LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit())
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249
250 if (!Subtarget.hasBasicD()) {
252 if (Subtarget.is64Bit()) {
255 }
256 }
257 }
258
259 // Set operations for 'D' feature.
260
261 if (Subtarget.hasBasicD()) {
262 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
265 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
268 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
269
272 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
276 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
277 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
278 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
282 setOperationAction(ISD::FSIN, MVT::f64, Expand);
283 setOperationAction(ISD::FCOS, MVT::f64, Expand);
284 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
285 setOperationAction(ISD::FPOW, MVT::f64, Expand);
287 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
288 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
289 Subtarget.isSoftFPABI() ? LibCall : Custom);
290 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
291 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293
294 if (Subtarget.is64Bit())
295 setOperationAction(ISD::FRINT, MVT::f64, Legal);
296 }
297
298 // Set operations for 'LSX' feature.
299
300 if (Subtarget.hasExtLSX()) {
302 // Expand all truncating stores and extending loads.
303 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
304 setTruncStoreAction(VT, InnerVT, Expand);
307 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
308 }
309 // By default everything must be expanded. Then we will selectively turn
310 // on ones that can be effectively codegen'd.
311 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 }
314
315 for (MVT VT : LSXVTs) {
316 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
317 setOperationAction(ISD::BITCAST, VT, Legal);
319
323
328 }
329 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
332 Legal);
334 VT, Legal);
341 Expand);
349 }
350 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
352 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
354 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
357 }
358 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
362 setOperationAction(ISD::FSQRT, VT, Legal);
363 setOperationAction(ISD::FNEG, VT, Legal);
366 VT, Expand);
368 }
370 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
371 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
372 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
373 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
374
375 for (MVT VT :
376 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
377 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
379 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
380 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
381 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
382 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
383 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
384 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
385 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
386 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
387 }
388 }
389
390 // Set operations for 'LASX' feature.
391
392 if (Subtarget.hasExtLASX()) {
393 for (MVT VT : LASXVTs) {
394 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
395 setOperationAction(ISD::BITCAST, VT, Legal);
397
403
407 }
408 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
411 Legal);
413 VT, Legal);
420 Expand);
428 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
429 }
430 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
432 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
434 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
437 }
438 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
442 setOperationAction(ISD::FSQRT, VT, Legal);
443 setOperationAction(ISD::FNEG, VT, Legal);
446 VT, Expand);
448 }
449 }
450
451 // Set DAG combine for LA32 and LA64.
452
457
458 // Set DAG combine for 'LSX' feature.
459
460 if (Subtarget.hasExtLSX()) {
462 setTargetDAGCombine(ISD::BITCAST);
463 }
464
465 // Set DAG combine for 'LASX' feature.
466
467 if (Subtarget.hasExtLASX())
469
470 // Compute derived properties from the register classes.
471 computeRegisterProperties(Subtarget.getRegisterInfo());
472
474
477
478 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
479
481
482 // Function alignments.
484 // Set preferred alignments.
485 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
486 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
487 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
488
489 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
490 if (Subtarget.hasLAMCAS())
492
493 if (Subtarget.hasSCQ()) {
495 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
496 }
497}
498
500 const GlobalAddressSDNode *GA) const {
501 // In order to maximise the opportunity for common subexpression elimination,
502 // keep a separate ADD node for the global address offset instead of folding
503 // it in the global address node. Later peephole optimisations may choose to
504 // fold it back in when profitable.
505 return false;
506}
507
509 SelectionDAG &DAG) const {
510 switch (Op.getOpcode()) {
511 case ISD::ATOMIC_FENCE:
512 return lowerATOMIC_FENCE(Op, DAG);
514 return lowerEH_DWARF_CFA(Op, DAG);
516 return lowerGlobalAddress(Op, DAG);
518 return lowerGlobalTLSAddress(Op, DAG);
520 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
522 return lowerINTRINSIC_W_CHAIN(Op, DAG);
524 return lowerINTRINSIC_VOID(Op, DAG);
526 return lowerBlockAddress(Op, DAG);
527 case ISD::JumpTable:
528 return lowerJumpTable(Op, DAG);
529 case ISD::SHL_PARTS:
530 return lowerShiftLeftParts(Op, DAG);
531 case ISD::SRA_PARTS:
532 return lowerShiftRightParts(Op, DAG, true);
533 case ISD::SRL_PARTS:
534 return lowerShiftRightParts(Op, DAG, false);
536 return lowerConstantPool(Op, DAG);
537 case ISD::FP_TO_SINT:
538 return lowerFP_TO_SINT(Op, DAG);
539 case ISD::BITCAST:
540 return lowerBITCAST(Op, DAG);
541 case ISD::UINT_TO_FP:
542 return lowerUINT_TO_FP(Op, DAG);
543 case ISD::SINT_TO_FP:
544 return lowerSINT_TO_FP(Op, DAG);
545 case ISD::VASTART:
546 return lowerVASTART(Op, DAG);
547 case ISD::FRAMEADDR:
548 return lowerFRAMEADDR(Op, DAG);
549 case ISD::RETURNADDR:
550 return lowerRETURNADDR(Op, DAG);
552 return lowerWRITE_REGISTER(Op, DAG);
554 return lowerINSERT_VECTOR_ELT(Op, DAG);
556 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
558 return lowerBUILD_VECTOR(Op, DAG);
560 return lowerCONCAT_VECTORS(Op, DAG);
562 return lowerVECTOR_SHUFFLE(Op, DAG);
563 case ISD::BITREVERSE:
564 return lowerBITREVERSE(Op, DAG);
566 return lowerSCALAR_TO_VECTOR(Op, DAG);
567 case ISD::PREFETCH:
568 return lowerPREFETCH(Op, DAG);
569 case ISD::SELECT:
570 return lowerSELECT(Op, DAG);
571 case ISD::BRCOND:
572 return lowerBRCOND(Op, DAG);
573 case ISD::FP_TO_FP16:
574 return lowerFP_TO_FP16(Op, DAG);
575 case ISD::FP16_TO_FP:
576 return lowerFP16_TO_FP(Op, DAG);
577 case ISD::FP_TO_BF16:
578 return lowerFP_TO_BF16(Op, DAG);
579 case ISD::BF16_TO_FP:
580 return lowerBF16_TO_FP(Op, DAG);
581 case ISD::VECREDUCE_ADD:
582 return lowerVECREDUCE_ADD(Op, DAG);
583 case ISD::VECREDUCE_AND:
584 case ISD::VECREDUCE_OR:
585 case ISD::VECREDUCE_XOR:
586 case ISD::VECREDUCE_SMAX:
587 case ISD::VECREDUCE_SMIN:
588 case ISD::VECREDUCE_UMAX:
589 case ISD::VECREDUCE_UMIN:
590 return lowerVECREDUCE(Op, DAG);
591 case ISD::ConstantFP:
592 return lowerConstantFP(Op, DAG);
593 }
594 return SDValue();
595}
596
597SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
598 SelectionDAG &DAG) const {
599 EVT VT = Op.getValueType();
601 const APFloat &FPVal = CFP->getValueAPF();
602 SDLoc DL(CFP);
603
604 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
605 (VT == MVT::f64 && Subtarget.hasBasicD()));
606
607 // If value is 0.0 or -0.0, just ignore it.
608 if (FPVal.isZero())
609 return SDValue();
610
611 // If lsx enabled, use cheaper 'vldi' instruction if possible.
612 if (isFPImmVLDILegal(FPVal, VT))
613 return SDValue();
614
615 // Construct as integer, and move to float register.
616 APInt INTVal = FPVal.bitcastToAPInt();
617
618 // If more than MaterializeFPImmInsNum instructions will be used to
619 // generate the INTVal and move it to float register, fallback to
620 // use floating point load from the constant pool.
622 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
623 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
624 return SDValue();
625
626 switch (VT.getSimpleVT().SimpleTy) {
627 default:
628 llvm_unreachable("Unexpected floating point type!");
629 break;
630 case MVT::f32: {
631 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
632 if (Subtarget.is64Bit())
633 NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
634 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
636 DL, VT, NewVal);
637 }
638 case MVT::f64: {
639 if (Subtarget.is64Bit()) {
640 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
641 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
642 }
643 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
644 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
645 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
646 }
647 }
648
649 return SDValue();
650}
651
652// Lower vecreduce_add using vhaddw instructions.
653// For Example:
654// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
655// can be lowered to:
656// VHADDW_D_W vr0, vr0, vr0
657// VHADDW_Q_D vr0, vr0, vr0
658// VPICKVE2GR_D a0, vr0, 0
659// ADDI_W a0, a0, 0
660SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
661 SelectionDAG &DAG) const {
662
663 SDLoc DL(Op);
664 MVT OpVT = Op.getSimpleValueType();
665 SDValue Val = Op.getOperand(0);
666
667 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
668 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
669 unsigned ResBits = OpVT.getScalarSizeInBits();
670
671 unsigned LegalVecSize = 128;
672 bool isLASX256Vector =
673 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
674
675 // Ensure operand type legal or enable it legal.
676 while (!isTypeLegal(Val.getSimpleValueType())) {
677 Val = DAG.WidenVector(Val, DL);
678 }
679
680 // NumEles is designed for iterations count, v4i32 for LSX
681 // and v8i32 for LASX should have the same count.
682 if (isLASX256Vector) {
683 NumEles /= 2;
684 LegalVecSize = 256;
685 }
686
687 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
688 MVT IntTy = MVT::getIntegerVT(EleBits);
689 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
690 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
691 }
692
693 if (isLASX256Vector) {
694 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
695 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
696 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
697 }
698
699 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
700 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
701 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
702}
703
704// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
705// For Example:
706// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
707// can be lowered to:
708// VBSRL_V vr1, vr0, 8
709// VMAX_W vr0, vr1, vr0
710// VBSRL_V vr1, vr0, 4
711// VMAX_W vr0, vr1, vr0
712// VPICKVE2GR_W a0, vr0, 0
713// For 256 bit vector, it is illegal and will be spilt into
714// two 128 bit vector by default then processed by this.
715SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
716 SelectionDAG &DAG) const {
717 SDLoc DL(Op);
718
719 MVT OpVT = Op.getSimpleValueType();
720 SDValue Val = Op.getOperand(0);
721
722 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
723 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
724
725 // Ensure operand type legal or enable it legal.
726 while (!isTypeLegal(Val.getSimpleValueType())) {
727 Val = DAG.WidenVector(Val, DL);
728 }
729
730 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
731 MVT VecTy = Val.getSimpleValueType();
732 MVT GRLenVT = Subtarget.getGRLenVT();
733
734 for (int i = NumEles; i > 1; i /= 2) {
735 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
736 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
737 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
738 }
739
740 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
741 DAG.getConstant(0, DL, GRLenVT));
742}
743
744SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
745 SelectionDAG &DAG) const {
746 unsigned IsData = Op.getConstantOperandVal(4);
747
748 // We don't support non-data prefetch.
749 // Just preserve the chain.
750 if (!IsData)
751 return Op.getOperand(0);
752
753 return Op;
754}
755
756// Return true if Val is equal to (setcc LHS, RHS, CC).
757// Return false if Val is the inverse of (setcc LHS, RHS, CC).
758// Otherwise, return std::nullopt.
759static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
760 ISD::CondCode CC, SDValue Val) {
761 assert(Val->getOpcode() == ISD::SETCC);
762 SDValue LHS2 = Val.getOperand(0);
763 SDValue RHS2 = Val.getOperand(1);
764 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
765
766 if (LHS == LHS2 && RHS == RHS2) {
767 if (CC == CC2)
768 return true;
769 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
770 return false;
771 } else if (LHS == RHS2 && RHS == LHS2) {
773 if (CC == CC2)
774 return true;
775 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
776 return false;
777 }
778
779 return std::nullopt;
780}
781
783 const LoongArchSubtarget &Subtarget) {
784 SDValue CondV = N->getOperand(0);
785 SDValue TrueV = N->getOperand(1);
786 SDValue FalseV = N->getOperand(2);
787 MVT VT = N->getSimpleValueType(0);
788 SDLoc DL(N);
789
790 // (select c, -1, y) -> -c | y
791 if (isAllOnesConstant(TrueV)) {
792 SDValue Neg = DAG.getNegative(CondV, DL, VT);
793 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
794 }
795 // (select c, y, -1) -> (c-1) | y
796 if (isAllOnesConstant(FalseV)) {
797 SDValue Neg =
798 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
799 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
800 }
801
802 // (select c, 0, y) -> (c-1) & y
803 if (isNullConstant(TrueV)) {
804 SDValue Neg =
805 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
806 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
807 }
808 // (select c, y, 0) -> -c & y
809 if (isNullConstant(FalseV)) {
810 SDValue Neg = DAG.getNegative(CondV, DL, VT);
811 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
812 }
813
814 // select c, ~x, x --> xor -c, x
815 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
816 const APInt &TrueVal = TrueV->getAsAPIntVal();
817 const APInt &FalseVal = FalseV->getAsAPIntVal();
818 if (~TrueVal == FalseVal) {
819 SDValue Neg = DAG.getNegative(CondV, DL, VT);
820 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
821 }
822 }
823
824 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
825 // when both truev and falsev are also setcc.
826 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
827 FalseV.getOpcode() == ISD::SETCC) {
828 SDValue LHS = CondV.getOperand(0);
829 SDValue RHS = CondV.getOperand(1);
830 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
831
832 // (select x, x, y) -> x | y
833 // (select !x, x, y) -> x & y
834 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
835 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
836 DAG.getFreeze(FalseV));
837 }
838 // (select x, y, x) -> x & y
839 // (select !x, y, x) -> x | y
840 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
841 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
842 DAG.getFreeze(TrueV), FalseV);
843 }
844 }
845
846 return SDValue();
847}
848
849// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
850// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
851// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
852// being `0` or `-1`. In such cases we can replace `select` with `and`.
853// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
854// than `c0`?
855static SDValue
857 const LoongArchSubtarget &Subtarget) {
858 unsigned SelOpNo = 0;
859 SDValue Sel = BO->getOperand(0);
860 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
861 SelOpNo = 1;
862 Sel = BO->getOperand(1);
863 }
864
865 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
866 return SDValue();
867
868 unsigned ConstSelOpNo = 1;
869 unsigned OtherSelOpNo = 2;
870 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
871 ConstSelOpNo = 2;
872 OtherSelOpNo = 1;
873 }
874 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
875 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
876 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
877 return SDValue();
878
879 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
880 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
881 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
882 return SDValue();
883
884 SDLoc DL(Sel);
885 EVT VT = BO->getValueType(0);
886
887 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
888 if (SelOpNo == 1)
889 std::swap(NewConstOps[0], NewConstOps[1]);
890
891 SDValue NewConstOp =
892 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
893 if (!NewConstOp)
894 return SDValue();
895
896 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
897 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
898 return SDValue();
899
900 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
901 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
902 if (SelOpNo == 1)
903 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
904 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
905
906 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
907 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
908 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
909}
910
911// Changes the condition code and swaps operands if necessary, so the SetCC
912// operation matches one of the comparisons supported directly by branches
913// in the LoongArch ISA. May adjust compares to favor compare with 0 over
914// compare with 1/-1.
916 ISD::CondCode &CC, SelectionDAG &DAG) {
917 // If this is a single bit test that can't be handled by ANDI, shift the
918 // bit to be tested to the MSB and perform a signed compare with 0.
919 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
920 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
921 isa<ConstantSDNode>(LHS.getOperand(1))) {
922 uint64_t Mask = LHS.getConstantOperandVal(1);
923 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
924 unsigned ShAmt = 0;
925 if (isPowerOf2_64(Mask)) {
926 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
927 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
928 } else {
929 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
930 }
931
932 LHS = LHS.getOperand(0);
933 if (ShAmt != 0)
934 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
935 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
936 return;
937 }
938 }
939
940 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
941 int64_t C = RHSC->getSExtValue();
942 switch (CC) {
943 default:
944 break;
945 case ISD::SETGT:
946 // Convert X > -1 to X >= 0.
947 if (C == -1) {
948 RHS = DAG.getConstant(0, DL, RHS.getValueType());
949 CC = ISD::SETGE;
950 return;
951 }
952 break;
953 case ISD::SETLT:
954 // Convert X < 1 to 0 >= X.
955 if (C == 1) {
956 RHS = LHS;
957 LHS = DAG.getConstant(0, DL, RHS.getValueType());
958 CC = ISD::SETGE;
959 return;
960 }
961 break;
962 }
963 }
964
965 switch (CC) {
966 default:
967 break;
968 case ISD::SETGT:
969 case ISD::SETLE:
970 case ISD::SETUGT:
971 case ISD::SETULE:
973 std::swap(LHS, RHS);
974 break;
975 }
976}
977
978SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
979 SelectionDAG &DAG) const {
980 SDValue CondV = Op.getOperand(0);
981 SDValue TrueV = Op.getOperand(1);
982 SDValue FalseV = Op.getOperand(2);
983 SDLoc DL(Op);
984 MVT VT = Op.getSimpleValueType();
985 MVT GRLenVT = Subtarget.getGRLenVT();
986
987 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
988 return V;
989
990 if (Op.hasOneUse()) {
991 unsigned UseOpc = Op->user_begin()->getOpcode();
992 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
993 SDNode *BinOp = *Op->user_begin();
994 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
995 DAG, Subtarget)) {
996 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
997 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
998 // may return a constant node and cause crash in lowerSELECT.
999 if (NewSel.getOpcode() == ISD::SELECT)
1000 return lowerSELECT(NewSel, DAG);
1001 return NewSel;
1002 }
1003 }
1004 }
1005
1006 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1007 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1008 // (select condv, truev, falsev)
1009 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1010 if (CondV.getOpcode() != ISD::SETCC ||
1011 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1012 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1013 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1014
1015 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1016
1017 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1018 }
1019
1020 // If the CondV is the output of a SETCC node which operates on GRLenVT
1021 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1022 // to take advantage of the integer compare+branch instructions. i.e.: (select
1023 // (setcc lhs, rhs, cc), truev, falsev)
1024 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1025 SDValue LHS = CondV.getOperand(0);
1026 SDValue RHS = CondV.getOperand(1);
1027 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1028
1029 // Special case for a select of 2 constants that have a difference of 1.
1030 // Normally this is done by DAGCombine, but if the select is introduced by
1031 // type legalization or op legalization, we miss it. Restricting to SETLT
1032 // case for now because that is what signed saturating add/sub need.
1033 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1034 // but we would probably want to swap the true/false values if the condition
1035 // is SETGE/SETLE to avoid an XORI.
1036 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1037 CCVal == ISD::SETLT) {
1038 const APInt &TrueVal = TrueV->getAsAPIntVal();
1039 const APInt &FalseVal = FalseV->getAsAPIntVal();
1040 if (TrueVal - 1 == FalseVal)
1041 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1042 if (TrueVal + 1 == FalseVal)
1043 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1044 }
1045
1046 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1047 // 1 < x ? x : 1 -> 0 < x ? x : 1
1048 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1049 RHS == TrueV && LHS == FalseV) {
1050 LHS = DAG.getConstant(0, DL, VT);
1051 // 0 <u x is the same as x != 0.
1052 if (CCVal == ISD::SETULT) {
1053 std::swap(LHS, RHS);
1054 CCVal = ISD::SETNE;
1055 }
1056 }
1057
1058 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1059 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1060 RHS == FalseV) {
1061 RHS = DAG.getConstant(0, DL, VT);
1062 }
1063
1064 SDValue TargetCC = DAG.getCondCode(CCVal);
1065
1066 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1067 // (select (setcc lhs, rhs, CC), constant, falsev)
1068 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1069 std::swap(TrueV, FalseV);
1070 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1071 }
1072
1073 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1074 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1075}
1076
1077SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1078 SelectionDAG &DAG) const {
1079 SDValue CondV = Op.getOperand(1);
1080 SDLoc DL(Op);
1081 MVT GRLenVT = Subtarget.getGRLenVT();
1082
1083 if (CondV.getOpcode() == ISD::SETCC) {
1084 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1085 SDValue LHS = CondV.getOperand(0);
1086 SDValue RHS = CondV.getOperand(1);
1087 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1088
1089 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1090
1091 SDValue TargetCC = DAG.getCondCode(CCVal);
1092 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1093 Op.getOperand(0), LHS, RHS, TargetCC,
1094 Op.getOperand(2));
1095 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1096 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1097 Op.getOperand(0), CondV, Op.getOperand(2));
1098 }
1099 }
1100
1101 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1102 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1103 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1104}
1105
1106SDValue
1107LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1108 SelectionDAG &DAG) const {
1109 SDLoc DL(Op);
1110 MVT OpVT = Op.getSimpleValueType();
1111
1112 SDValue Vector = DAG.getUNDEF(OpVT);
1113 SDValue Val = Op.getOperand(0);
1114 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1115
1116 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1117}
1118
1119SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1120 SelectionDAG &DAG) const {
1121 EVT ResTy = Op->getValueType(0);
1122 SDValue Src = Op->getOperand(0);
1123 SDLoc DL(Op);
1124
1125 // LoongArchISD::BITREV_8B is not supported on LA32.
1126 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1127 return SDValue();
1128
1129 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1130 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1131 unsigned int NewEltNum = NewVT.getVectorNumElements();
1132
1133 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1134
1136 for (unsigned int i = 0; i < NewEltNum; i++) {
1137 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1138 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1139 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1140 ? (unsigned)LoongArchISD::BITREV_8B
1141 : (unsigned)ISD::BITREVERSE;
1142 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1143 }
1144 SDValue Res =
1145 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1146
1147 switch (ResTy.getSimpleVT().SimpleTy) {
1148 default:
1149 return SDValue();
1150 case MVT::v16i8:
1151 case MVT::v32i8:
1152 return Res;
1153 case MVT::v8i16:
1154 case MVT::v16i16:
1155 case MVT::v4i32:
1156 case MVT::v8i32: {
1158 for (unsigned int i = 0; i < NewEltNum; i++)
1159 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1160 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1161 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1162 }
1163 }
1164}
1165
1166// Widen element type to get a new mask value (if possible).
1167// For example:
1168// shufflevector <4 x i32> %a, <4 x i32> %b,
1169// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1170// is equivalent to:
1171// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1172// can be lowered to:
1173// VPACKOD_D vr0, vr0, vr1
1175 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1176 unsigned EltBits = VT.getScalarSizeInBits();
1177
1178 if (EltBits > 32 || EltBits == 1)
1179 return SDValue();
1180
1181 SmallVector<int, 8> NewMask;
1182 if (widenShuffleMaskElts(Mask, NewMask)) {
1183 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1184 : MVT::getIntegerVT(EltBits * 2);
1185 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1186 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1187 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1188 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1189 return DAG.getBitcast(
1190 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1191 }
1192 }
1193
1194 return SDValue();
1195}
1196
1197/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1198/// instruction.
1199// The funciton matches elements from one of the input vector shuffled to the
1200// left or right with zeroable elements 'shifted in'. It handles both the
1201// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1202// lane.
1203// Mostly copied from X86.
1204static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1205 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1206 int MaskOffset, const APInt &Zeroable) {
1207 int Size = Mask.size();
1208 unsigned SizeInBits = Size * ScalarSizeInBits;
1209
1210 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1211 for (int i = 0; i < Size; i += Scale)
1212 for (int j = 0; j < Shift; ++j)
1213 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1214 return false;
1215
1216 return true;
1217 };
1218
1219 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1220 int Step = 1) {
1221 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1222 if (!(Mask[i] == -1 || Mask[i] == Low))
1223 return false;
1224 return true;
1225 };
1226
1227 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1228 for (int i = 0; i != Size; i += Scale) {
1229 unsigned Pos = Left ? i + Shift : i;
1230 unsigned Low = Left ? i : i + Shift;
1231 unsigned Len = Scale - Shift;
1232 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1233 return -1;
1234 }
1235
1236 int ShiftEltBits = ScalarSizeInBits * Scale;
1237 bool ByteShift = ShiftEltBits > 64;
1238 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1239 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1240 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1241
1242 // Normalize the scale for byte shifts to still produce an i64 element
1243 // type.
1244 Scale = ByteShift ? Scale / 2 : Scale;
1245
1246 // We need to round trip through the appropriate type for the shift.
1247 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1248 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1249 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1250 return (int)ShiftAmt;
1251 };
1252
1253 unsigned MaxWidth = 128;
1254 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1255 for (int Shift = 1; Shift != Scale; ++Shift)
1256 for (bool Left : {true, false})
1257 if (CheckZeros(Shift, Scale, Left)) {
1258 int ShiftAmt = MatchShift(Shift, Scale, Left);
1259 if (0 < ShiftAmt)
1260 return ShiftAmt;
1261 }
1262
1263 // no match
1264 return -1;
1265}
1266
1267/// Lower VECTOR_SHUFFLE as shift (if possible).
1268///
1269/// For example:
1270/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1271/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1272/// is lowered to:
1273/// (VBSLL_V $v0, $v0, 4)
1274///
1275/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1276/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1277/// is lowered to:
1278/// (VSLLI_D $v0, $v0, 32)
1280 MVT VT, SDValue V1, SDValue V2,
1281 SelectionDAG &DAG,
1282 const LoongArchSubtarget &Subtarget,
1283 const APInt &Zeroable) {
1284 int Size = Mask.size();
1285 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1286
1287 MVT ShiftVT;
1288 SDValue V = V1;
1289 unsigned Opcode;
1290
1291 // Try to match shuffle against V1 shift.
1292 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1293 Mask, 0, Zeroable);
1294
1295 // If V1 failed, try to match shuffle against V2 shift.
1296 if (ShiftAmt < 0) {
1297 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1298 Mask, Size, Zeroable);
1299 V = V2;
1300 }
1301
1302 if (ShiftAmt < 0)
1303 return SDValue();
1304
1305 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1306 "Illegal integer vector type");
1307 V = DAG.getBitcast(ShiftVT, V);
1308 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1309 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1310 return DAG.getBitcast(VT, V);
1311}
1312
1313/// Determine whether a range fits a regular pattern of values.
1314/// This function accounts for the possibility of jumping over the End iterator.
1315template <typename ValType>
1316static bool
1318 unsigned CheckStride,
1320 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1321 auto &I = Begin;
1322
1323 while (I != End) {
1324 if (*I != -1 && *I != ExpectedIndex)
1325 return false;
1326 ExpectedIndex += ExpectedIndexStride;
1327
1328 // Incrementing past End is undefined behaviour so we must increment one
1329 // step at a time and check for End at each step.
1330 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1331 ; // Empty loop body.
1332 }
1333 return true;
1334}
1335
1336/// Compute whether each element of a shuffle is zeroable.
1337///
1338/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1340 SDValue V2, APInt &KnownUndef,
1341 APInt &KnownZero) {
1342 int Size = Mask.size();
1343 KnownUndef = KnownZero = APInt::getZero(Size);
1344
1345 V1 = peekThroughBitcasts(V1);
1346 V2 = peekThroughBitcasts(V2);
1347
1348 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1349 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1350
1351 int VectorSizeInBits = V1.getValueSizeInBits();
1352 int ScalarSizeInBits = VectorSizeInBits / Size;
1353 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1354 (void)ScalarSizeInBits;
1355
1356 for (int i = 0; i < Size; ++i) {
1357 int M = Mask[i];
1358 if (M < 0) {
1359 KnownUndef.setBit(i);
1360 continue;
1361 }
1362 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1363 KnownZero.setBit(i);
1364 continue;
1365 }
1366 }
1367}
1368
1369/// Test whether a shuffle mask is equivalent within each sub-lane.
1370///
1371/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1372/// non-trivial to compute in the face of undef lanes. The representation is
1373/// suitable for use with existing 128-bit shuffles as entries from the second
1374/// vector have been remapped to [LaneSize, 2*LaneSize).
1375static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1376 ArrayRef<int> Mask,
1377 SmallVectorImpl<int> &RepeatedMask) {
1378 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1379 RepeatedMask.assign(LaneSize, -1);
1380 int Size = Mask.size();
1381 for (int i = 0; i < Size; ++i) {
1382 assert(Mask[i] == -1 || Mask[i] >= 0);
1383 if (Mask[i] < 0)
1384 continue;
1385 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1386 // This entry crosses lanes, so there is no way to model this shuffle.
1387 return false;
1388
1389 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1390 // Adjust second vector indices to start at LaneSize instead of Size.
1391 int LocalM =
1392 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1393 if (RepeatedMask[i % LaneSize] < 0)
1394 // This is the first non-undef entry in this slot of a 128-bit lane.
1395 RepeatedMask[i % LaneSize] = LocalM;
1396 else if (RepeatedMask[i % LaneSize] != LocalM)
1397 // Found a mismatch with the repeated mask.
1398 return false;
1399 }
1400 return true;
1401}
1402
1403/// Attempts to match vector shuffle as byte rotation.
1405 ArrayRef<int> Mask) {
1406
1407 SDValue Lo, Hi;
1408 SmallVector<int, 16> RepeatedMask;
1409
1410 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1411 return -1;
1412
1413 int NumElts = RepeatedMask.size();
1414 int Rotation = 0;
1415 int Scale = 16 / NumElts;
1416
1417 for (int i = 0; i < NumElts; ++i) {
1418 int M = RepeatedMask[i];
1419 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1420 "Unexpected mask index.");
1421 if (M < 0)
1422 continue;
1423
1424 // Determine where a rotated vector would have started.
1425 int StartIdx = i - (M % NumElts);
1426 if (StartIdx == 0)
1427 return -1;
1428
1429 // If we found the tail of a vector the rotation must be the missing
1430 // front. If we found the head of a vector, it must be how much of the
1431 // head.
1432 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1433
1434 if (Rotation == 0)
1435 Rotation = CandidateRotation;
1436 else if (Rotation != CandidateRotation)
1437 return -1;
1438
1439 // Compute which value this mask is pointing at.
1440 SDValue MaskV = M < NumElts ? V1 : V2;
1441
1442 // Compute which of the two target values this index should be assigned
1443 // to. This reflects whether the high elements are remaining or the low
1444 // elements are remaining.
1445 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1446
1447 // Either set up this value if we've not encountered it before, or check
1448 // that it remains consistent.
1449 if (!TargetV)
1450 TargetV = MaskV;
1451 else if (TargetV != MaskV)
1452 return -1;
1453 }
1454
1455 // Check that we successfully analyzed the mask, and normalize the results.
1456 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1457 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1458 if (!Lo)
1459 Lo = Hi;
1460 else if (!Hi)
1461 Hi = Lo;
1462
1463 V1 = Lo;
1464 V2 = Hi;
1465
1466 return Rotation * Scale;
1467}
1468
1469/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1470///
1471/// For example:
1472/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1473/// <2 x i32> <i32 3, i32 0>
1474/// is lowered to:
1475/// (VBSRL_V $v1, $v1, 8)
1476/// (VBSLL_V $v0, $v0, 8)
1477/// (VOR_V $v0, $V0, $v1)
1478static SDValue
1480 SDValue V1, SDValue V2, SelectionDAG &DAG,
1481 const LoongArchSubtarget &Subtarget) {
1482
1483 SDValue Lo = V1, Hi = V2;
1484 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1485 if (ByteRotation <= 0)
1486 return SDValue();
1487
1488 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1489 Lo = DAG.getBitcast(ByteVT, Lo);
1490 Hi = DAG.getBitcast(ByteVT, Hi);
1491
1492 int LoByteShift = 16 - ByteRotation;
1493 int HiByteShift = ByteRotation;
1494 MVT GRLenVT = Subtarget.getGRLenVT();
1495
1496 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1497 DAG.getConstant(LoByteShift, DL, GRLenVT));
1498 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1499 DAG.getConstant(HiByteShift, DL, GRLenVT));
1500 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1501}
1502
1503/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1504///
1505/// For example:
1506/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1507/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1508/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1509/// is lowered to:
1510/// (VREPLI $v1, 0)
1511/// (VILVL $v0, $v1, $v0)
1513 ArrayRef<int> Mask, MVT VT,
1514 SDValue V1, SDValue V2,
1515 SelectionDAG &DAG,
1516 const APInt &Zeroable) {
1517 int Bits = VT.getSizeInBits();
1518 int EltBits = VT.getScalarSizeInBits();
1519 int NumElements = VT.getVectorNumElements();
1520
1521 if (Zeroable.isAllOnes())
1522 return DAG.getConstant(0, DL, VT);
1523
1524 // Define a helper function to check a particular ext-scale and lower to it if
1525 // valid.
1526 auto Lower = [&](int Scale) -> SDValue {
1527 SDValue InputV;
1528 bool AnyExt = true;
1529 int Offset = 0;
1530 for (int i = 0; i < NumElements; i++) {
1531 int M = Mask[i];
1532 if (M < 0)
1533 continue;
1534 if (i % Scale != 0) {
1535 // Each of the extended elements need to be zeroable.
1536 if (!Zeroable[i])
1537 return SDValue();
1538
1539 AnyExt = false;
1540 continue;
1541 }
1542
1543 // Each of the base elements needs to be consecutive indices into the
1544 // same input vector.
1545 SDValue V = M < NumElements ? V1 : V2;
1546 M = M % NumElements;
1547 if (!InputV) {
1548 InputV = V;
1549 Offset = M - (i / Scale);
1550
1551 // These offset can't be handled
1552 if (Offset % (NumElements / Scale))
1553 return SDValue();
1554 } else if (InputV != V)
1555 return SDValue();
1556
1557 if (M != (Offset + (i / Scale)))
1558 return SDValue(); // Non-consecutive strided elements.
1559 }
1560
1561 // If we fail to find an input, we have a zero-shuffle which should always
1562 // have already been handled.
1563 if (!InputV)
1564 return SDValue();
1565
1566 do {
1567 unsigned VilVLoHi = LoongArchISD::VILVL;
1568 if (Offset >= (NumElements / 2)) {
1569 VilVLoHi = LoongArchISD::VILVH;
1570 Offset -= (NumElements / 2);
1571 }
1572
1573 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1574 SDValue Ext =
1575 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1576 InputV = DAG.getBitcast(InputVT, InputV);
1577 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1578 Scale /= 2;
1579 EltBits *= 2;
1580 NumElements /= 2;
1581 } while (Scale > 1);
1582 return DAG.getBitcast(VT, InputV);
1583 };
1584
1585 // Each iteration, try extending the elements half as much, but into twice as
1586 // many elements.
1587 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1588 NumExtElements *= 2) {
1589 if (SDValue V = Lower(NumElements / NumExtElements))
1590 return V;
1591 }
1592 return SDValue();
1593}
1594
1595/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1596///
1597/// VREPLVEI performs vector broadcast based on an element specified by an
1598/// integer immediate, with its mask being similar to:
1599/// <x, x, x, ...>
1600/// where x is any valid index.
1601///
1602/// When undef's appear in the mask they are treated as if they were whatever
1603/// value is necessary in order to fit the above form.
1604static SDValue
1606 SDValue V1, SelectionDAG &DAG,
1607 const LoongArchSubtarget &Subtarget) {
1608 int SplatIndex = -1;
1609 for (const auto &M : Mask) {
1610 if (M != -1) {
1611 SplatIndex = M;
1612 break;
1613 }
1614 }
1615
1616 if (SplatIndex == -1)
1617 return DAG.getUNDEF(VT);
1618
1619 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1620 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1621 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1622 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1623 }
1624
1625 return SDValue();
1626}
1627
1628/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1629///
1630/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1631/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1632///
1633/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1634/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1635/// When undef's appear they are treated as if they were whatever value is
1636/// necessary in order to fit the above forms.
1637///
1638/// For example:
1639/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1640/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1641/// i32 7, i32 6, i32 5, i32 4>
1642/// is lowered to:
1643/// (VSHUF4I_H $v0, $v1, 27)
1644/// where the 27 comes from:
1645/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1646static SDValue
1648 SDValue V1, SDValue V2, SelectionDAG &DAG,
1649 const LoongArchSubtarget &Subtarget) {
1650
1651 unsigned SubVecSize = 4;
1652 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1653 SubVecSize = 2;
1654
1655 int SubMask[4] = {-1, -1, -1, -1};
1656 for (unsigned i = 0; i < SubVecSize; ++i) {
1657 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1658 int M = Mask[j];
1659
1660 // Convert from vector index to 4-element subvector index
1661 // If an index refers to an element outside of the subvector then give up
1662 if (M != -1) {
1663 M -= 4 * (j / SubVecSize);
1664 if (M < 0 || M >= 4)
1665 return SDValue();
1666 }
1667
1668 // If the mask has an undef, replace it with the current index.
1669 // Note that it might still be undef if the current index is also undef
1670 if (SubMask[i] == -1)
1671 SubMask[i] = M;
1672 // Check that non-undef values are the same as in the mask. If they
1673 // aren't then give up
1674 else if (M != -1 && M != SubMask[i])
1675 return SDValue();
1676 }
1677 }
1678
1679 // Calculate the immediate. Replace any remaining undefs with zero
1680 int Imm = 0;
1681 for (int i = SubVecSize - 1; i >= 0; --i) {
1682 int M = SubMask[i];
1683
1684 if (M == -1)
1685 M = 0;
1686
1687 Imm <<= 2;
1688 Imm |= M & 0x3;
1689 }
1690
1691 MVT GRLenVT = Subtarget.getGRLenVT();
1692
1693 // Return vshuf4i.d
1694 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1695 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1696 DAG.getConstant(Imm, DL, GRLenVT));
1697
1698 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1699 DAG.getConstant(Imm, DL, GRLenVT));
1700}
1701
1702/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1703///
1704/// VPACKEV interleaves the even elements from each vector.
1705///
1706/// It is possible to lower into VPACKEV when the mask consists of two of the
1707/// following forms interleaved:
1708/// <0, 2, 4, ...>
1709/// <n, n+2, n+4, ...>
1710/// where n is the number of elements in the vector.
1711/// For example:
1712/// <0, 0, 2, 2, 4, 4, ...>
1713/// <0, n, 2, n+2, 4, n+4, ...>
1714///
1715/// When undef's appear in the mask they are treated as if they were whatever
1716/// value is necessary in order to fit the above forms.
1718 MVT VT, SDValue V1, SDValue V2,
1719 SelectionDAG &DAG) {
1720
1721 const auto &Begin = Mask.begin();
1722 const auto &End = Mask.end();
1723 SDValue OriV1 = V1, OriV2 = V2;
1724
1725 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1726 V1 = OriV1;
1727 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1728 V1 = OriV2;
1729 else
1730 return SDValue();
1731
1732 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1733 V2 = OriV1;
1734 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1735 V2 = OriV2;
1736 else
1737 return SDValue();
1738
1739 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1740}
1741
1742/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1743///
1744/// VPACKOD interleaves the odd elements from each vector.
1745///
1746/// It is possible to lower into VPACKOD when the mask consists of two of the
1747/// following forms interleaved:
1748/// <1, 3, 5, ...>
1749/// <n+1, n+3, n+5, ...>
1750/// where n is the number of elements in the vector.
1751/// For example:
1752/// <1, 1, 3, 3, 5, 5, ...>
1753/// <1, n+1, 3, n+3, 5, n+5, ...>
1754///
1755/// When undef's appear in the mask they are treated as if they were whatever
1756/// value is necessary in order to fit the above forms.
1758 MVT VT, SDValue V1, SDValue V2,
1759 SelectionDAG &DAG) {
1760
1761 const auto &Begin = Mask.begin();
1762 const auto &End = Mask.end();
1763 SDValue OriV1 = V1, OriV2 = V2;
1764
1765 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1766 V1 = OriV1;
1767 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1768 V1 = OriV2;
1769 else
1770 return SDValue();
1771
1772 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1773 V2 = OriV1;
1774 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1775 V2 = OriV2;
1776 else
1777 return SDValue();
1778
1779 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1780}
1781
1782/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1783///
1784/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1785/// of each vector.
1786///
1787/// It is possible to lower into VILVH when the mask consists of two of the
1788/// following forms interleaved:
1789/// <x, x+1, x+2, ...>
1790/// <n+x, n+x+1, n+x+2, ...>
1791/// where n is the number of elements in the vector and x is half n.
1792/// For example:
1793/// <x, x, x+1, x+1, x+2, x+2, ...>
1794/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1795///
1796/// When undef's appear in the mask they are treated as if they were whatever
1797/// value is necessary in order to fit the above forms.
1799 MVT VT, SDValue V1, SDValue V2,
1800 SelectionDAG &DAG) {
1801
1802 const auto &Begin = Mask.begin();
1803 const auto &End = Mask.end();
1804 unsigned HalfSize = Mask.size() / 2;
1805 SDValue OriV1 = V1, OriV2 = V2;
1806
1807 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1808 V1 = OriV1;
1809 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1810 V1 = OriV2;
1811 else
1812 return SDValue();
1813
1814 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1815 V2 = OriV1;
1816 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1817 1))
1818 V2 = OriV2;
1819 else
1820 return SDValue();
1821
1822 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1823}
1824
1825/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1826///
1827/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1828/// of each vector.
1829///
1830/// It is possible to lower into VILVL when the mask consists of two of the
1831/// following forms interleaved:
1832/// <0, 1, 2, ...>
1833/// <n, n+1, n+2, ...>
1834/// where n is the number of elements in the vector.
1835/// For example:
1836/// <0, 0, 1, 1, 2, 2, ...>
1837/// <0, n, 1, n+1, 2, n+2, ...>
1838///
1839/// When undef's appear in the mask they are treated as if they were whatever
1840/// value is necessary in order to fit the above forms.
1842 MVT VT, SDValue V1, SDValue V2,
1843 SelectionDAG &DAG) {
1844
1845 const auto &Begin = Mask.begin();
1846 const auto &End = Mask.end();
1847 SDValue OriV1 = V1, OriV2 = V2;
1848
1849 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1850 V1 = OriV1;
1851 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1852 V1 = OriV2;
1853 else
1854 return SDValue();
1855
1856 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1857 V2 = OriV1;
1858 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1859 V2 = OriV2;
1860 else
1861 return SDValue();
1862
1863 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1864}
1865
1866/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1867///
1868/// VPICKEV copies the even elements of each vector into the result vector.
1869///
1870/// It is possible to lower into VPICKEV when the mask consists of two of the
1871/// following forms concatenated:
1872/// <0, 2, 4, ...>
1873/// <n, n+2, n+4, ...>
1874/// where n is the number of elements in the vector.
1875/// For example:
1876/// <0, 2, 4, ..., 0, 2, 4, ...>
1877/// <0, 2, 4, ..., n, n+2, n+4, ...>
1878///
1879/// When undef's appear in the mask they are treated as if they were whatever
1880/// value is necessary in order to fit the above forms.
1882 MVT VT, SDValue V1, SDValue V2,
1883 SelectionDAG &DAG) {
1884
1885 const auto &Begin = Mask.begin();
1886 const auto &Mid = Mask.begin() + Mask.size() / 2;
1887 const auto &End = Mask.end();
1888 SDValue OriV1 = V1, OriV2 = V2;
1889
1890 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1891 V1 = OriV1;
1892 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1893 V1 = OriV2;
1894 else
1895 return SDValue();
1896
1897 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1898 V2 = OriV1;
1899 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1900 V2 = OriV2;
1901
1902 else
1903 return SDValue();
1904
1905 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1906}
1907
1908/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1909///
1910/// VPICKOD copies the odd elements of each vector into the result vector.
1911///
1912/// It is possible to lower into VPICKOD when the mask consists of two of the
1913/// following forms concatenated:
1914/// <1, 3, 5, ...>
1915/// <n+1, n+3, n+5, ...>
1916/// where n is the number of elements in the vector.
1917/// For example:
1918/// <1, 3, 5, ..., 1, 3, 5, ...>
1919/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1920///
1921/// When undef's appear in the mask they are treated as if they were whatever
1922/// value is necessary in order to fit the above forms.
1924 MVT VT, SDValue V1, SDValue V2,
1925 SelectionDAG &DAG) {
1926
1927 const auto &Begin = Mask.begin();
1928 const auto &Mid = Mask.begin() + Mask.size() / 2;
1929 const auto &End = Mask.end();
1930 SDValue OriV1 = V1, OriV2 = V2;
1931
1932 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1933 V1 = OriV1;
1934 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1935 V1 = OriV2;
1936 else
1937 return SDValue();
1938
1939 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1940 V2 = OriV1;
1941 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1942 V2 = OriV2;
1943 else
1944 return SDValue();
1945
1946 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1947}
1948
1949/// Lower VECTOR_SHUFFLE into VSHUF.
1950///
1951/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1952/// adding it as an operand to the resulting VSHUF.
1954 MVT VT, SDValue V1, SDValue V2,
1955 SelectionDAG &DAG,
1956 const LoongArchSubtarget &Subtarget) {
1957
1959 for (auto M : Mask)
1960 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
1961
1962 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1963 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1964
1965 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1966 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1967 // VSHF concatenates the vectors in a bitwise fashion:
1968 // <0b00, 0b01> + <0b10, 0b11> ->
1969 // 0b0100 + 0b1110 -> 0b01001110
1970 // <0b10, 0b11, 0b00, 0b01>
1971 // We must therefore swap the operands to get the correct result.
1972 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1973}
1974
1975/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1976///
1977/// This routine breaks down the specific type of 128-bit shuffle and
1978/// dispatches to the lowering routines accordingly.
1980 SDValue V1, SDValue V2, SelectionDAG &DAG,
1981 const LoongArchSubtarget &Subtarget) {
1982 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1983 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1984 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1985 "Vector type is unsupported for lsx!");
1987 "Two operands have different types!");
1988 assert(VT.getVectorNumElements() == Mask.size() &&
1989 "Unexpected mask size for shuffle!");
1990 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1991
1992 APInt KnownUndef, KnownZero;
1993 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1994 APInt Zeroable = KnownUndef | KnownZero;
1995
1996 SDValue Result;
1997 // TODO: Add more comparison patterns.
1998 if (V2.isUndef()) {
1999 if ((Result =
2000 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2001 return Result;
2002 if ((Result =
2003 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2004 return Result;
2005
2006 // TODO: This comment may be enabled in the future to better match the
2007 // pattern for instruction selection.
2008 /* V2 = V1; */
2009 }
2010
2011 // It is recommended not to change the pattern comparison order for better
2012 // performance.
2013 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2014 return Result;
2015 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2016 return Result;
2017 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2018 return Result;
2019 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2020 return Result;
2021 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2022 return Result;
2023 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2024 return Result;
2025 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2026 (Result =
2027 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2028 return Result;
2029 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2030 Zeroable)))
2031 return Result;
2032 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2033 Zeroable)))
2034 return Result;
2035 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2036 Subtarget)))
2037 return Result;
2038 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2039 return NewShuffle;
2040 if ((Result =
2041 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2042 return Result;
2043 return SDValue();
2044}
2045
2046/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2047///
2048/// It is a XVREPLVEI when the mask is:
2049/// <x, x, x, ..., x+n, x+n, x+n, ...>
2050/// where the number of x is equal to n and n is half the length of vector.
2051///
2052/// When undef's appear in the mask they are treated as if they were whatever
2053/// value is necessary in order to fit the above form.
2054static SDValue
2056 SDValue V1, SelectionDAG &DAG,
2057 const LoongArchSubtarget &Subtarget) {
2058 int SplatIndex = -1;
2059 for (const auto &M : Mask) {
2060 if (M != -1) {
2061 SplatIndex = M;
2062 break;
2063 }
2064 }
2065
2066 if (SplatIndex == -1)
2067 return DAG.getUNDEF(VT);
2068
2069 const auto &Begin = Mask.begin();
2070 const auto &End = Mask.end();
2071 int HalfSize = Mask.size() / 2;
2072
2073 if (SplatIndex >= HalfSize)
2074 return SDValue();
2075
2076 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2077 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2078 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2079 0)) {
2080 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2081 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2082 }
2083
2084 return SDValue();
2085}
2086
2087/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2088static SDValue
2090 SDValue V1, SDValue V2, SelectionDAG &DAG,
2091 const LoongArchSubtarget &Subtarget) {
2092 // When the size is less than or equal to 4, lower cost instructions may be
2093 // used.
2094 if (Mask.size() <= 4)
2095 return SDValue();
2096 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2097}
2098
2099/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2100static SDValue
2102 SDValue V1, SelectionDAG &DAG,
2103 const LoongArchSubtarget &Subtarget) {
2104 // Only consider XVPERMI_D.
2105 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2106 return SDValue();
2107
2108 unsigned MaskImm = 0;
2109 for (unsigned i = 0; i < Mask.size(); ++i) {
2110 if (Mask[i] == -1)
2111 continue;
2112 MaskImm |= Mask[i] << (i * 2);
2113 }
2114
2115 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2116 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2117}
2118
2119/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2121 MVT VT, SDValue V1, SelectionDAG &DAG,
2122 const LoongArchSubtarget &Subtarget) {
2123 // LoongArch LASX only have XVPERM_W.
2124 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2125 return SDValue();
2126
2127 unsigned NumElts = VT.getVectorNumElements();
2128 unsigned HalfSize = NumElts / 2;
2129 bool FrontLo = true, FrontHi = true;
2130 bool BackLo = true, BackHi = true;
2131
2132 auto inRange = [](int val, int low, int high) {
2133 return (val == -1) || (val >= low && val < high);
2134 };
2135
2136 for (unsigned i = 0; i < HalfSize; ++i) {
2137 int Fronti = Mask[i];
2138 int Backi = Mask[i + HalfSize];
2139
2140 FrontLo &= inRange(Fronti, 0, HalfSize);
2141 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2142 BackLo &= inRange(Backi, 0, HalfSize);
2143 BackHi &= inRange(Backi, HalfSize, NumElts);
2144 }
2145
2146 // If both the lower and upper 128-bit parts access only one half of the
2147 // vector (either lower or upper), avoid using xvperm.w. The latency of
2148 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2149 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2150 return SDValue();
2151
2153 MVT GRLenVT = Subtarget.getGRLenVT();
2154 for (unsigned i = 0; i < NumElts; ++i)
2155 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2156 : DAG.getConstant(Mask[i], DL, GRLenVT));
2157 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2158
2159 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2160}
2161
2162/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2164 MVT VT, SDValue V1, SDValue V2,
2165 SelectionDAG &DAG) {
2166 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2167}
2168
2169/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2171 MVT VT, SDValue V1, SDValue V2,
2172 SelectionDAG &DAG) {
2173 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2174}
2175
2176/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2178 MVT VT, SDValue V1, SDValue V2,
2179 SelectionDAG &DAG) {
2180
2181 const auto &Begin = Mask.begin();
2182 const auto &End = Mask.end();
2183 unsigned HalfSize = Mask.size() / 2;
2184 unsigned LeftSize = HalfSize / 2;
2185 SDValue OriV1 = V1, OriV2 = V2;
2186
2187 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2188 1) &&
2189 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2190 V1 = OriV1;
2191 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2192 Mask.size() + HalfSize - LeftSize, 1) &&
2193 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2194 Mask.size() + HalfSize + LeftSize, 1))
2195 V1 = OriV2;
2196 else
2197 return SDValue();
2198
2199 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2200 1) &&
2201 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2202 1))
2203 V2 = OriV1;
2204 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2205 Mask.size() + HalfSize - LeftSize, 1) &&
2206 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2207 Mask.size() + HalfSize + LeftSize, 1))
2208 V2 = OriV2;
2209 else
2210 return SDValue();
2211
2212 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2213}
2214
2215/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2217 MVT VT, SDValue V1, SDValue V2,
2218 SelectionDAG &DAG) {
2219
2220 const auto &Begin = Mask.begin();
2221 const auto &End = Mask.end();
2222 unsigned HalfSize = Mask.size() / 2;
2223 SDValue OriV1 = V1, OriV2 = V2;
2224
2225 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2226 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2227 V1 = OriV1;
2228 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2229 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2230 Mask.size() + HalfSize, 1))
2231 V1 = OriV2;
2232 else
2233 return SDValue();
2234
2235 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2236 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2237 V2 = OriV1;
2238 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2239 1) &&
2240 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2241 Mask.size() + HalfSize, 1))
2242 V2 = OriV2;
2243 else
2244 return SDValue();
2245
2246 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2247}
2248
2249/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2251 MVT VT, SDValue V1, SDValue V2,
2252 SelectionDAG &DAG) {
2253
2254 const auto &Begin = Mask.begin();
2255 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2256 const auto &Mid = Mask.begin() + Mask.size() / 2;
2257 const auto &RightMid = Mask.end() - Mask.size() / 4;
2258 const auto &End = Mask.end();
2259 unsigned HalfSize = Mask.size() / 2;
2260 SDValue OriV1 = V1, OriV2 = V2;
2261
2262 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2263 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2264 V1 = OriV1;
2265 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2266 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2267 V1 = OriV2;
2268 else
2269 return SDValue();
2270
2271 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2272 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2273 V2 = OriV1;
2274 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2275 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2276 V2 = OriV2;
2277
2278 else
2279 return SDValue();
2280
2281 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2282}
2283
2284/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2286 MVT VT, SDValue V1, SDValue V2,
2287 SelectionDAG &DAG) {
2288
2289 const auto &Begin = Mask.begin();
2290 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2291 const auto &Mid = Mask.begin() + Mask.size() / 2;
2292 const auto &RightMid = Mask.end() - Mask.size() / 4;
2293 const auto &End = Mask.end();
2294 unsigned HalfSize = Mask.size() / 2;
2295 SDValue OriV1 = V1, OriV2 = V2;
2296
2297 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2298 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2299 V1 = OriV1;
2300 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2301 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2302 2))
2303 V1 = OriV2;
2304 else
2305 return SDValue();
2306
2307 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2308 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2309 V2 = OriV1;
2310 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2311 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2312 2))
2313 V2 = OriV2;
2314 else
2315 return SDValue();
2316
2317 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2318}
2319
2320/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2322 MVT VT, SDValue V1, SDValue V2,
2323 SelectionDAG &DAG) {
2324
2325 int MaskSize = Mask.size();
2326 int HalfSize = Mask.size() / 2;
2327 const auto &Begin = Mask.begin();
2328 const auto &Mid = Mask.begin() + HalfSize;
2329 const auto &End = Mask.end();
2330
2331 // VECTOR_SHUFFLE concatenates the vectors:
2332 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2333 // shuffling ->
2334 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2335 //
2336 // XVSHUF concatenates the vectors:
2337 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2338 // shuffling ->
2339 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2340 SmallVector<SDValue, 8> MaskAlloc;
2341 for (auto it = Begin; it < Mid; it++) {
2342 if (*it < 0) // UNDEF
2343 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2344 else if ((*it >= 0 && *it < HalfSize) ||
2345 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2346 int M = *it < HalfSize ? *it : *it - HalfSize;
2347 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2348 } else
2349 return SDValue();
2350 }
2351 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2352
2353 for (auto it = Mid; it < End; it++) {
2354 if (*it < 0) // UNDEF
2355 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2356 else if ((*it >= HalfSize && *it < MaskSize) ||
2357 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2358 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2359 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2360 } else
2361 return SDValue();
2362 }
2363 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2364
2365 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2366 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2367 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2368}
2369
2370/// Shuffle vectors by lane to generate more optimized instructions.
2371/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2372///
2373/// Therefore, except for the following four cases, other cases are regarded
2374/// as cross-lane shuffles, where optimization is relatively limited.
2375///
2376/// - Shuffle high, low lanes of two inputs vector
2377/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2378/// - Shuffle low, high lanes of two inputs vector
2379/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2380/// - Shuffle low, low lanes of two inputs vector
2381/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2382/// - Shuffle high, high lanes of two inputs vector
2383/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2384///
2385/// The first case is the closest to LoongArch instructions and the other
2386/// cases need to be converted to it for processing.
2387///
2388/// This function will return true for the last three cases above and will
2389/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2390/// cross-lane shuffle cases.
2392 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2393 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2394
2395 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2396
2397 int MaskSize = Mask.size();
2398 int HalfSize = Mask.size() / 2;
2399 MVT GRLenVT = Subtarget.getGRLenVT();
2400
2401 HalfMaskType preMask = None, postMask = None;
2402
2403 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2404 return M < 0 || (M >= 0 && M < HalfSize) ||
2405 (M >= MaskSize && M < MaskSize + HalfSize);
2406 }))
2407 preMask = HighLaneTy;
2408 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2409 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2410 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2411 }))
2412 preMask = LowLaneTy;
2413
2414 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2415 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2416 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2417 }))
2418 postMask = LowLaneTy;
2419 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2420 return M < 0 || (M >= 0 && M < HalfSize) ||
2421 (M >= MaskSize && M < MaskSize + HalfSize);
2422 }))
2423 postMask = HighLaneTy;
2424
2425 // The pre-half of mask is high lane type, and the post-half of mask
2426 // is low lane type, which is closest to the LoongArch instructions.
2427 //
2428 // Note: In the LoongArch architecture, the high lane of mask corresponds
2429 // to the lower 128-bit of vector register, and the low lane of mask
2430 // corresponds the higher 128-bit of vector register.
2431 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2432 return false;
2433 }
2434 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2435 V1 = DAG.getBitcast(MVT::v4i64, V1);
2436 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2437 DAG.getConstant(0b01001110, DL, GRLenVT));
2438 V1 = DAG.getBitcast(VT, V1);
2439
2440 if (!V2.isUndef()) {
2441 V2 = DAG.getBitcast(MVT::v4i64, V2);
2442 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2443 DAG.getConstant(0b01001110, DL, GRLenVT));
2444 V2 = DAG.getBitcast(VT, V2);
2445 }
2446
2447 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2448 *it = *it < 0 ? *it : *it - HalfSize;
2449 }
2450 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2451 *it = *it < 0 ? *it : *it + HalfSize;
2452 }
2453 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2454 V1 = DAG.getBitcast(MVT::v4i64, V1);
2455 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2456 DAG.getConstant(0b11101110, DL, GRLenVT));
2457 V1 = DAG.getBitcast(VT, V1);
2458
2459 if (!V2.isUndef()) {
2460 V2 = DAG.getBitcast(MVT::v4i64, V2);
2461 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2462 DAG.getConstant(0b11101110, DL, GRLenVT));
2463 V2 = DAG.getBitcast(VT, V2);
2464 }
2465
2466 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2467 *it = *it < 0 ? *it : *it - HalfSize;
2468 }
2469 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2470 V1 = DAG.getBitcast(MVT::v4i64, V1);
2471 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2472 DAG.getConstant(0b01000100, DL, GRLenVT));
2473 V1 = DAG.getBitcast(VT, V1);
2474
2475 if (!V2.isUndef()) {
2476 V2 = DAG.getBitcast(MVT::v4i64, V2);
2477 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2478 DAG.getConstant(0b01000100, DL, GRLenVT));
2479 V2 = DAG.getBitcast(VT, V2);
2480 }
2481
2482 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2483 *it = *it < 0 ? *it : *it + HalfSize;
2484 }
2485 } else { // cross-lane
2486 return false;
2487 }
2488
2489 return true;
2490}
2491
2492/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2493/// Only for 256-bit vector.
2494///
2495/// For example:
2496/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2497/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2498/// is lowerded to:
2499/// (XVPERMI $xr2, $xr0, 78)
2500/// (XVSHUF $xr1, $xr2, $xr0)
2501/// (XVORI $xr0, $xr1, 0)
2503 ArrayRef<int> Mask,
2504 MVT VT, SDValue V1,
2505 SDValue V2,
2506 SelectionDAG &DAG) {
2507 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2508 int Size = Mask.size();
2509 int LaneSize = Size / 2;
2510
2511 bool LaneCrossing[2] = {false, false};
2512 for (int i = 0; i < Size; ++i)
2513 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2514 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2515
2516 // Ensure that all lanes ared involved.
2517 if (!LaneCrossing[0] && !LaneCrossing[1])
2518 return SDValue();
2519
2520 SmallVector<int> InLaneMask;
2521 InLaneMask.assign(Mask.begin(), Mask.end());
2522 for (int i = 0; i < Size; ++i) {
2523 int &M = InLaneMask[i];
2524 if (M < 0)
2525 continue;
2526 if (((M % Size) / LaneSize) != (i / LaneSize))
2527 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2528 }
2529
2530 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2531 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2532 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2533 Flipped = DAG.getBitcast(VT, Flipped);
2534 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2535}
2536
2537/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2538///
2539/// This routine breaks down the specific type of 256-bit shuffle and
2540/// dispatches to the lowering routines accordingly.
2542 SDValue V1, SDValue V2, SelectionDAG &DAG,
2543 const LoongArchSubtarget &Subtarget) {
2544 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2545 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2546 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2547 "Vector type is unsupported for lasx!");
2549 "Two operands have different types!");
2550 assert(VT.getVectorNumElements() == Mask.size() &&
2551 "Unexpected mask size for shuffle!");
2552 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2553 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2554
2555 APInt KnownUndef, KnownZero;
2556 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2557 APInt Zeroable = KnownUndef | KnownZero;
2558
2559 SDValue Result;
2560 // TODO: Add more comparison patterns.
2561 if (V2.isUndef()) {
2562 if ((Result =
2563 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2564 return Result;
2565 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2566 Subtarget)))
2567 return Result;
2568 if ((Result =
2569 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2570 return Result;
2571 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2572 return Result;
2573
2574 // TODO: This comment may be enabled in the future to better match the
2575 // pattern for instruction selection.
2576 /* V2 = V1; */
2577 }
2578
2579 // It is recommended not to change the pattern comparison order for better
2580 // performance.
2581 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2582 return Result;
2583 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2584 return Result;
2585 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2586 return Result;
2587 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2588 return Result;
2589 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2590 return Result;
2591 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2592 return Result;
2593 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2594 Zeroable)))
2595 return Result;
2596 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2597 Subtarget)))
2598 return Result;
2599
2600 // canonicalize non cross-lane shuffle vector
2601 SmallVector<int> NewMask(Mask);
2602 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2603 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2604
2605 // FIXME: Handling the remaining cases earlier can degrade performance
2606 // in some situations. Further analysis is required to enable more
2607 // effective optimizations.
2608 if (V2.isUndef()) {
2609 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2610 V1, V2, DAG)))
2611 return Result;
2612 }
2613
2614 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2615 return NewShuffle;
2616 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2617 return Result;
2618
2619 return SDValue();
2620}
2621
2622SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2623 SelectionDAG &DAG) const {
2624 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2625 ArrayRef<int> OrigMask = SVOp->getMask();
2626 SDValue V1 = Op.getOperand(0);
2627 SDValue V2 = Op.getOperand(1);
2628 MVT VT = Op.getSimpleValueType();
2629 int NumElements = VT.getVectorNumElements();
2630 SDLoc DL(Op);
2631
2632 bool V1IsUndef = V1.isUndef();
2633 bool V2IsUndef = V2.isUndef();
2634 if (V1IsUndef && V2IsUndef)
2635 return DAG.getUNDEF(VT);
2636
2637 // When we create a shuffle node we put the UNDEF node to second operand,
2638 // but in some cases the first operand may be transformed to UNDEF.
2639 // In this case we should just commute the node.
2640 if (V1IsUndef)
2641 return DAG.getCommutedVectorShuffle(*SVOp);
2642
2643 // Check for non-undef masks pointing at an undef vector and make the masks
2644 // undef as well. This makes it easier to match the shuffle based solely on
2645 // the mask.
2646 if (V2IsUndef &&
2647 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2648 SmallVector<int, 8> NewMask(OrigMask);
2649 for (int &M : NewMask)
2650 if (M >= NumElements)
2651 M = -1;
2652 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2653 }
2654
2655 // Check for illegal shuffle mask element index values.
2656 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2657 (void)MaskUpperLimit;
2658 assert(llvm::all_of(OrigMask,
2659 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2660 "Out of bounds shuffle index");
2661
2662 // For each vector width, delegate to a specialized lowering routine.
2663 if (VT.is128BitVector())
2664 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2665
2666 if (VT.is256BitVector())
2667 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2668
2669 return SDValue();
2670}
2671
2672SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2673 SelectionDAG &DAG) const {
2674 // Custom lower to ensure the libcall return is passed in an FPR on hard
2675 // float ABIs.
2676 SDLoc DL(Op);
2677 MakeLibCallOptions CallOptions;
2678 SDValue Op0 = Op.getOperand(0);
2679 SDValue Chain = SDValue();
2680 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2681 SDValue Res;
2682 std::tie(Res, Chain) =
2683 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2684 if (Subtarget.is64Bit())
2685 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2686 return DAG.getBitcast(MVT::i32, Res);
2687}
2688
2689SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2690 SelectionDAG &DAG) const {
2691 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2692 // float ABIs.
2693 SDLoc DL(Op);
2694 MakeLibCallOptions CallOptions;
2695 SDValue Op0 = Op.getOperand(0);
2696 SDValue Chain = SDValue();
2697 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2698 DL, MVT::f32, Op0)
2699 : DAG.getBitcast(MVT::f32, Op0);
2700 SDValue Res;
2701 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2702 CallOptions, DL, Chain);
2703 return Res;
2704}
2705
2706SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2707 SelectionDAG &DAG) const {
2708 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2709 SDLoc DL(Op);
2710 MakeLibCallOptions CallOptions;
2711 RTLIB::Libcall LC =
2712 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2713 SDValue Res =
2714 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2715 if (Subtarget.is64Bit())
2716 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2717 return DAG.getBitcast(MVT::i32, Res);
2718}
2719
2720SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2721 SelectionDAG &DAG) const {
2722 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2723 MVT VT = Op.getSimpleValueType();
2724 SDLoc DL(Op);
2725 Op = DAG.getNode(
2726 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2727 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2728 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2729 DL, MVT::f32, Op)
2730 : DAG.getBitcast(MVT::f32, Op);
2731 if (VT != MVT::f32)
2732 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2733 return Res;
2734}
2735
2736// Lower BUILD_VECTOR as broadcast load (if possible).
2737// For example:
2738// %a = load i8, ptr %ptr
2739// %b = build_vector %a, %a, %a, %a
2740// is lowered to :
2741// (VLDREPL_B $a0, 0)
2743 const SDLoc &DL,
2744 SelectionDAG &DAG) {
2745 MVT VT = BVOp->getSimpleValueType(0);
2746 int NumOps = BVOp->getNumOperands();
2747
2748 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2749 "Unsupported vector type for broadcast.");
2750
2751 SDValue IdentitySrc;
2752 bool IsIdeneity = true;
2753
2754 for (int i = 0; i != NumOps; i++) {
2755 SDValue Op = BVOp->getOperand(i);
2756 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2757 IsIdeneity = false;
2758 break;
2759 }
2760 IdentitySrc = BVOp->getOperand(0);
2761 }
2762
2763 // make sure that this load is valid and only has one user.
2764 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2765 return SDValue();
2766
2767 auto *LN = cast<LoadSDNode>(IdentitySrc);
2768 auto ExtType = LN->getExtensionType();
2769
2770 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2771 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2772 SDVTList Tys =
2773 LN->isIndexed()
2774 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2775 : DAG.getVTList(VT, MVT::Other);
2776 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2777 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2778 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2779 return BCast;
2780 }
2781 return SDValue();
2782}
2783
2784// Sequentially insert elements from Ops into Vector, from low to high indices.
2785// Note: Ops can have fewer elements than Vector.
2787 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2788 EVT ResTy) {
2789 assert(Ops.size() <= ResTy.getVectorNumElements());
2790
2791 SDValue Op0 = Ops[0];
2792 if (!Op0.isUndef())
2793 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2794 for (unsigned i = 1; i < Ops.size(); ++i) {
2795 SDValue Opi = Ops[i];
2796 if (Opi.isUndef())
2797 continue;
2798 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2799 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2800 }
2801}
2802
2803// Build a ResTy subvector from Node, taking NumElts elements starting at index
2804// 'first'.
2806 SelectionDAG &DAG, SDLoc DL,
2807 const LoongArchSubtarget &Subtarget,
2808 EVT ResTy, unsigned first) {
2809 unsigned NumElts = ResTy.getVectorNumElements();
2810
2811 assert(first >= 0 &&
2812 first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2813
2814 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2815 Node->op_begin() + first + NumElts);
2816 SDValue Vector = DAG.getUNDEF(ResTy);
2817 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2818 return Vector;
2819}
2820
2821SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2822 SelectionDAG &DAG) const {
2823 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2824 MVT VT = Node->getSimpleValueType(0);
2825 EVT ResTy = Op->getValueType(0);
2826 unsigned NumElts = ResTy.getVectorNumElements();
2827 SDLoc DL(Op);
2828 APInt SplatValue, SplatUndef;
2829 unsigned SplatBitSize;
2830 bool HasAnyUndefs;
2831 bool IsConstant = false;
2832 bool UseSameConstant = true;
2833 SDValue ConstantValue;
2834 bool Is128Vec = ResTy.is128BitVector();
2835 bool Is256Vec = ResTy.is256BitVector();
2836
2837 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2838 (!Subtarget.hasExtLASX() || !Is256Vec))
2839 return SDValue();
2840
2841 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2842 return Result;
2843
2844 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2845 /*MinSplatBits=*/8) &&
2846 SplatBitSize <= 64) {
2847 // We can only cope with 8, 16, 32, or 64-bit elements.
2848 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2849 SplatBitSize != 64)
2850 return SDValue();
2851
2852 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2853 // We can only handle 64-bit elements that are within
2854 // the signed 10-bit range or match vldi patterns on 32-bit targets.
2855 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2856 if (!SplatValue.isSignedIntN(10) &&
2857 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
2858 return SDValue();
2859 if ((Is128Vec && ResTy == MVT::v4i32) ||
2860 (Is256Vec && ResTy == MVT::v8i32))
2861 return Op;
2862 }
2863
2864 EVT ViaVecTy;
2865
2866 switch (SplatBitSize) {
2867 default:
2868 return SDValue();
2869 case 8:
2870 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2871 break;
2872 case 16:
2873 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2874 break;
2875 case 32:
2876 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2877 break;
2878 case 64:
2879 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2880 break;
2881 }
2882
2883 // SelectionDAG::getConstant will promote SplatValue appropriately.
2884 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2885
2886 // Bitcast to the type we originally wanted.
2887 if (ViaVecTy != ResTy)
2888 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2889
2890 return Result;
2891 }
2892
2893 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2894 return Op;
2895
2896 for (unsigned i = 0; i < NumElts; ++i) {
2897 SDValue Opi = Node->getOperand(i);
2898 if (isIntOrFPConstant(Opi)) {
2899 IsConstant = true;
2900 if (!ConstantValue.getNode())
2901 ConstantValue = Opi;
2902 else if (ConstantValue != Opi)
2903 UseSameConstant = false;
2904 }
2905 }
2906
2907 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2908 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2909 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2910 for (unsigned i = 0; i < NumElts; ++i) {
2911 SDValue Opi = Node->getOperand(i);
2912 if (!isIntOrFPConstant(Opi))
2913 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2914 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2915 }
2916 return Result;
2917 }
2918
2919 if (!IsConstant) {
2920 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2921 // the sub-sequence of the vector and then broadcast the sub-sequence.
2922 //
2923 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2924 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2925 // generates worse code in some cases. This could be further optimized
2926 // with more consideration.
2928 BitVector UndefElements;
2929 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2930 UndefElements.count() == 0) {
2931 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2932 // because the high part can be simply treated as undef.
2933 SDValue Vector = DAG.getUNDEF(ResTy);
2934 EVT FillTy = Is256Vec
2936 : ResTy;
2937 SDValue FillVec =
2938 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
2939
2940 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
2941
2942 unsigned SeqLen = Sequence.size();
2943 unsigned SplatLen = NumElts / SeqLen;
2944 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2945 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2946
2947 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
2948 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
2949 if (SplatEltTy == MVT::i128)
2950 SplatTy = MVT::v4i64;
2951
2952 SDValue SplatVec;
2953 SDValue SrcVec = DAG.getBitcast(
2954 SplatTy,
2955 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
2956 if (Is256Vec) {
2957 SplatVec =
2958 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
2960 DL, SplatTy, SrcVec);
2961 } else {
2962 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
2963 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2964 }
2965
2966 return DAG.getBitcast(ResTy, SplatVec);
2967 }
2968
2969 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
2970 // using memory operations is much lower.
2971 //
2972 // For 256-bit vectors, normally split into two halves and concatenate.
2973 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
2974 // one non-undef element, skip spliting to avoid a worse result.
2975 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
2976 ResTy == MVT::v4f64) {
2977 unsigned NonUndefCount = 0;
2978 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
2979 if (!Node->getOperand(i).isUndef()) {
2980 ++NonUndefCount;
2981 if (NonUndefCount > 1)
2982 break;
2983 }
2984 }
2985 if (NonUndefCount == 1)
2986 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
2987 }
2988
2989 EVT VecTy =
2990 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
2991 SDValue Vector =
2992 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
2993
2994 if (Is128Vec)
2995 return Vector;
2996
2997 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
2998 VecTy, NumElts / 2);
2999
3000 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3001 }
3002
3003 return SDValue();
3004}
3005
3006SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3007 SelectionDAG &DAG) const {
3008 SDLoc DL(Op);
3009 MVT ResVT = Op.getSimpleValueType();
3010 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3011
3012 unsigned NumOperands = Op.getNumOperands();
3013 unsigned NumFreezeUndef = 0;
3014 unsigned NumZero = 0;
3015 unsigned NumNonZero = 0;
3016 unsigned NonZeros = 0;
3017 SmallSet<SDValue, 4> Undefs;
3018 for (unsigned i = 0; i != NumOperands; ++i) {
3019 SDValue SubVec = Op.getOperand(i);
3020 if (SubVec.isUndef())
3021 continue;
3022 if (ISD::isFreezeUndef(SubVec.getNode())) {
3023 // If the freeze(undef) has multiple uses then we must fold to zero.
3024 if (SubVec.hasOneUse()) {
3025 ++NumFreezeUndef;
3026 } else {
3027 ++NumZero;
3028 Undefs.insert(SubVec);
3029 }
3030 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3031 ++NumZero;
3032 else {
3033 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3034 NonZeros |= 1 << i;
3035 ++NumNonZero;
3036 }
3037 }
3038
3039 // If we have more than 2 non-zeros, build each half separately.
3040 if (NumNonZero > 2) {
3041 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3042 ArrayRef<SDUse> Ops = Op->ops();
3043 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3044 Ops.slice(0, NumOperands / 2));
3045 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3046 Ops.slice(NumOperands / 2));
3047 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3048 }
3049
3050 // Otherwise, build it up through insert_subvectors.
3051 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3052 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3053 : DAG.getUNDEF(ResVT));
3054
3055 // Replace Undef operands with ZeroVector.
3056 for (SDValue U : Undefs)
3057 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3058
3059 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3060 unsigned NumSubElems = SubVT.getVectorNumElements();
3061 for (unsigned i = 0; i != NumOperands; ++i) {
3062 if ((NonZeros & (1 << i)) == 0)
3063 continue;
3064
3065 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3066 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3067 }
3068
3069 return Vec;
3070}
3071
3072SDValue
3073LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3074 SelectionDAG &DAG) const {
3075 MVT EltVT = Op.getSimpleValueType();
3076 SDValue Vec = Op->getOperand(0);
3077 EVT VecTy = Vec->getValueType(0);
3078 SDValue Idx = Op->getOperand(1);
3079 SDLoc DL(Op);
3080 MVT GRLenVT = Subtarget.getGRLenVT();
3081
3082 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3083
3084 if (isa<ConstantSDNode>(Idx))
3085 return Op;
3086
3087 switch (VecTy.getSimpleVT().SimpleTy) {
3088 default:
3089 llvm_unreachable("Unexpected type");
3090 case MVT::v32i8:
3091 case MVT::v16i16:
3092 case MVT::v4i64:
3093 case MVT::v4f64: {
3094 // Extract the high half subvector and place it to the low half of a new
3095 // vector. It doesn't matter what the high half of the new vector is.
3096 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3097 SDValue VecHi =
3098 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3099 SDValue TmpVec =
3100 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3101 VecHi, DAG.getConstant(0, DL, GRLenVT));
3102
3103 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3104 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3105 // desired element.
3106 SDValue IdxCp =
3107 Subtarget.is64Bit()
3108 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3109 : DAG.getBitcast(MVT::f32, Idx);
3110 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3111 SDValue MaskVec =
3112 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3113 SDValue ResVec =
3114 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3115
3116 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3117 DAG.getConstant(0, DL, GRLenVT));
3118 }
3119 case MVT::v8i32:
3120 case MVT::v8f32: {
3121 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3122 SDValue SplatValue =
3123 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3124
3125 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3126 DAG.getConstant(0, DL, GRLenVT));
3127 }
3128 }
3129}
3130
3131SDValue
3132LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3133 SelectionDAG &DAG) const {
3134 MVT VT = Op.getSimpleValueType();
3135 MVT EltVT = VT.getVectorElementType();
3136 unsigned NumElts = VT.getVectorNumElements();
3137 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3138 SDLoc DL(Op);
3139 SDValue Op0 = Op.getOperand(0);
3140 SDValue Op1 = Op.getOperand(1);
3141 SDValue Op2 = Op.getOperand(2);
3142
3143 if (isa<ConstantSDNode>(Op2))
3144 return Op;
3145
3146 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3147 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3148
3149 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3150 return SDValue();
3151
3152 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3153 SmallVector<SDValue, 32> RawIndices;
3154 SDValue SplatIdx;
3155 SDValue Indices;
3156
3157 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3158 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3159 for (unsigned i = 0; i < NumElts; ++i) {
3160 RawIndices.push_back(Op2);
3161 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3162 }
3163 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3164 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3165
3166 RawIndices.clear();
3167 for (unsigned i = 0; i < NumElts; ++i) {
3168 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3169 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3170 }
3171 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3172 Indices = DAG.getBitcast(IdxVTy, Indices);
3173 } else {
3174 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3175
3176 for (unsigned i = 0; i < NumElts; ++i)
3177 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3178 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3179 }
3180
3181 // insert vec, elt, idx
3182 // =>
3183 // select (splatidx == {0,1,2...}) ? splatelt : vec
3184 SDValue SelectCC =
3185 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3186 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3187}
3188
3189SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3190 SelectionDAG &DAG) const {
3191 SDLoc DL(Op);
3192 SyncScope::ID FenceSSID =
3193 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3194
3195 // singlethread fences only synchronize with signal handlers on the same
3196 // thread and thus only need to preserve instruction order, not actually
3197 // enforce memory ordering.
3198 if (FenceSSID == SyncScope::SingleThread)
3199 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3200 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3201
3202 return Op;
3203}
3204
3205SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3206 SelectionDAG &DAG) const {
3207
3208 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3209 DAG.getContext()->emitError(
3210 "On LA64, only 64-bit registers can be written.");
3211 return Op.getOperand(0);
3212 }
3213
3214 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3215 DAG.getContext()->emitError(
3216 "On LA32, only 32-bit registers can be written.");
3217 return Op.getOperand(0);
3218 }
3219
3220 return Op;
3221}
3222
3223SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3224 SelectionDAG &DAG) const {
3225 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3226 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3227 "be a constant integer");
3228 return SDValue();
3229 }
3230
3231 MachineFunction &MF = DAG.getMachineFunction();
3233 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3234 EVT VT = Op.getValueType();
3235 SDLoc DL(Op);
3236 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3237 unsigned Depth = Op.getConstantOperandVal(0);
3238 int GRLenInBytes = Subtarget.getGRLen() / 8;
3239
3240 while (Depth--) {
3241 int Offset = -(GRLenInBytes * 2);
3242 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3243 DAG.getSignedConstant(Offset, DL, VT));
3244 FrameAddr =
3245 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3246 }
3247 return FrameAddr;
3248}
3249
3250SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3251 SelectionDAG &DAG) const {
3252 // Currently only support lowering return address for current frame.
3253 if (Op.getConstantOperandVal(0) != 0) {
3254 DAG.getContext()->emitError(
3255 "return address can only be determined for the current frame");
3256 return SDValue();
3257 }
3258
3259 MachineFunction &MF = DAG.getMachineFunction();
3261 MVT GRLenVT = Subtarget.getGRLenVT();
3262
3263 // Return the value of the return address register, marking it an implicit
3264 // live-in.
3265 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3266 getRegClassFor(GRLenVT));
3267 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3268}
3269
3270SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3271 SelectionDAG &DAG) const {
3272 MachineFunction &MF = DAG.getMachineFunction();
3273 auto Size = Subtarget.getGRLen() / 8;
3274 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3275 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3276}
3277
3278SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3279 SelectionDAG &DAG) const {
3280 MachineFunction &MF = DAG.getMachineFunction();
3281 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3282
3283 SDLoc DL(Op);
3284 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3286
3287 // vastart just stores the address of the VarArgsFrameIndex slot into the
3288 // memory location argument.
3289 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3290 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3291 MachinePointerInfo(SV));
3292}
3293
3294SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3295 SelectionDAG &DAG) const {
3296 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3297 !Subtarget.hasBasicD() && "unexpected target features");
3298
3299 SDLoc DL(Op);
3300 SDValue Op0 = Op.getOperand(0);
3301 if (Op0->getOpcode() == ISD::AND) {
3302 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3303 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3304 return Op;
3305 }
3306
3307 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3308 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3309 Op0.getConstantOperandVal(2) == UINT64_C(0))
3310 return Op;
3311
3312 if (Op0.getOpcode() == ISD::AssertZext &&
3313 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3314 return Op;
3315
3316 EVT OpVT = Op0.getValueType();
3317 EVT RetVT = Op.getValueType();
3318 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3319 MakeLibCallOptions CallOptions;
3320 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3321 SDValue Chain = SDValue();
3323 std::tie(Result, Chain) =
3324 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3325 return Result;
3326}
3327
3328SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3329 SelectionDAG &DAG) const {
3330 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3331 !Subtarget.hasBasicD() && "unexpected target features");
3332
3333 SDLoc DL(Op);
3334 SDValue Op0 = Op.getOperand(0);
3335
3336 if ((Op0.getOpcode() == ISD::AssertSext ||
3338 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3339 return Op;
3340
3341 EVT OpVT = Op0.getValueType();
3342 EVT RetVT = Op.getValueType();
3343 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3344 MakeLibCallOptions CallOptions;
3345 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3346 SDValue Chain = SDValue();
3348 std::tie(Result, Chain) =
3349 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3350 return Result;
3351}
3352
3353SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3354 SelectionDAG &DAG) const {
3355
3356 SDLoc DL(Op);
3357 EVT VT = Op.getValueType();
3358 SDValue Op0 = Op.getOperand(0);
3359 EVT Op0VT = Op0.getValueType();
3360
3361 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3362 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3363 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3364 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3365 }
3366 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3367 SDValue Lo, Hi;
3368 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3369 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3370 }
3371 return Op;
3372}
3373
3374SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3375 SelectionDAG &DAG) const {
3376
3377 SDLoc DL(Op);
3378 SDValue Op0 = Op.getOperand(0);
3379
3380 if (Op0.getValueType() == MVT::f16)
3381 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3382
3383 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3384 !Subtarget.hasBasicD()) {
3385 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3386 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3387 }
3388
3389 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3390 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3391 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3392}
3393
3395 SelectionDAG &DAG, unsigned Flags) {
3396 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3397}
3398
3400 SelectionDAG &DAG, unsigned Flags) {
3401 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3402 Flags);
3403}
3404
3406 SelectionDAG &DAG, unsigned Flags) {
3407 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3408 N->getOffset(), Flags);
3409}
3410
3412 SelectionDAG &DAG, unsigned Flags) {
3413 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3414}
3415
3416template <class NodeTy>
3417SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3419 bool IsLocal) const {
3420 SDLoc DL(N);
3421 EVT Ty = getPointerTy(DAG.getDataLayout());
3422 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3423 SDValue Load;
3424
3425 switch (M) {
3426 default:
3427 report_fatal_error("Unsupported code model");
3428
3429 case CodeModel::Large: {
3430 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3431
3432 // This is not actually used, but is necessary for successfully matching
3433 // the PseudoLA_*_LARGE nodes.
3434 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3435 if (IsLocal) {
3436 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3437 // eventually becomes the desired 5-insn code sequence.
3438 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3439 Tmp, Addr),
3440 0);
3441 } else {
3442 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3443 // eventually becomes the desired 5-insn code sequence.
3444 Load = SDValue(
3445 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3446 0);
3447 }
3448 break;
3449 }
3450
3451 case CodeModel::Small:
3452 case CodeModel::Medium:
3453 if (IsLocal) {
3454 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3455 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3456 Load = SDValue(
3457 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3458 } else {
3459 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3460 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3461 Load =
3462 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3463 }
3464 }
3465
3466 if (!IsLocal) {
3467 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3468 MachineFunction &MF = DAG.getMachineFunction();
3469 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3473 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3474 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3475 }
3476
3477 return Load;
3478}
3479
3480SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3481 SelectionDAG &DAG) const {
3482 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3483 DAG.getTarget().getCodeModel());
3484}
3485
3486SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3487 SelectionDAG &DAG) const {
3488 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3489 DAG.getTarget().getCodeModel());
3490}
3491
3492SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3493 SelectionDAG &DAG) const {
3494 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3495 DAG.getTarget().getCodeModel());
3496}
3497
3498SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3499 SelectionDAG &DAG) const {
3500 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3501 assert(N->getOffset() == 0 && "unexpected offset in global node");
3502 auto CM = DAG.getTarget().getCodeModel();
3503 const GlobalValue *GV = N->getGlobal();
3504
3505 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3506 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3507 CM = *GCM;
3508 }
3509
3510 return getAddr(N, DAG, CM, GV->isDSOLocal());
3511}
3512
3513SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3514 SelectionDAG &DAG,
3515 unsigned Opc, bool UseGOT,
3516 bool Large) const {
3517 SDLoc DL(N);
3518 EVT Ty = getPointerTy(DAG.getDataLayout());
3519 MVT GRLenVT = Subtarget.getGRLenVT();
3520
3521 // This is not actually used, but is necessary for successfully matching the
3522 // PseudoLA_*_LARGE nodes.
3523 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3524 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3525
3526 // Only IE needs an extra argument for large code model.
3527 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3528 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3529 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3530
3531 // If it is LE for normal/medium code model, the add tp operation will occur
3532 // during the pseudo-instruction expansion.
3533 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3534 return Offset;
3535
3536 if (UseGOT) {
3537 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3538 MachineFunction &MF = DAG.getMachineFunction();
3539 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3543 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3544 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3545 }
3546
3547 // Add the thread pointer.
3548 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3549 DAG.getRegister(LoongArch::R2, GRLenVT));
3550}
3551
3552SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3553 SelectionDAG &DAG,
3554 unsigned Opc,
3555 bool Large) const {
3556 SDLoc DL(N);
3557 EVT Ty = getPointerTy(DAG.getDataLayout());
3558 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3559
3560 // This is not actually used, but is necessary for successfully matching the
3561 // PseudoLA_*_LARGE nodes.
3562 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3563
3564 // Use a PC-relative addressing mode to access the dynamic GOT address.
3565 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3566 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3567 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3568
3569 // Prepare argument list to generate call.
3571 Args.emplace_back(Load, CallTy);
3572
3573 // Setup call to __tls_get_addr.
3574 TargetLowering::CallLoweringInfo CLI(DAG);
3575 CLI.setDebugLoc(DL)
3576 .setChain(DAG.getEntryNode())
3577 .setLibCallee(CallingConv::C, CallTy,
3578 DAG.getExternalSymbol("__tls_get_addr", Ty),
3579 std::move(Args));
3580
3581 return LowerCallTo(CLI).first;
3582}
3583
3584SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3585 SelectionDAG &DAG, unsigned Opc,
3586 bool Large) const {
3587 SDLoc DL(N);
3588 EVT Ty = getPointerTy(DAG.getDataLayout());
3589 const GlobalValue *GV = N->getGlobal();
3590
3591 // This is not actually used, but is necessary for successfully matching the
3592 // PseudoLA_*_LARGE nodes.
3593 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3594
3595 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3596 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3597 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3598 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3599 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3600}
3601
3602SDValue
3603LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3604 SelectionDAG &DAG) const {
3607 report_fatal_error("In GHC calling convention TLS is not supported");
3608
3609 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3610 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3611
3612 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3613 assert(N->getOffset() == 0 && "unexpected offset in global node");
3614
3615 if (DAG.getTarget().useEmulatedTLS())
3616 reportFatalUsageError("the emulated TLS is prohibited");
3617
3618 bool IsDesc = DAG.getTarget().useTLSDESC();
3619
3620 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3622 // In this model, application code calls the dynamic linker function
3623 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3624 // runtime.
3625 if (!IsDesc)
3626 return getDynamicTLSAddr(N, DAG,
3627 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3628 : LoongArch::PseudoLA_TLS_GD,
3629 Large);
3630 break;
3632 // Same as GeneralDynamic, except for assembly modifiers and relocation
3633 // records.
3634 if (!IsDesc)
3635 return getDynamicTLSAddr(N, DAG,
3636 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3637 : LoongArch::PseudoLA_TLS_LD,
3638 Large);
3639 break;
3641 // This model uses the GOT to resolve TLS offsets.
3642 return getStaticTLSAddr(N, DAG,
3643 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3644 : LoongArch::PseudoLA_TLS_IE,
3645 /*UseGOT=*/true, Large);
3647 // This model is used when static linking as the TLS offsets are resolved
3648 // during program linking.
3649 //
3650 // This node doesn't need an extra argument for the large code model.
3651 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3652 /*UseGOT=*/false, Large);
3653 }
3654
3655 return getTLSDescAddr(N, DAG,
3656 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3657 : LoongArch::PseudoLA_TLS_DESC,
3658 Large);
3659}
3660
3661template <unsigned N>
3663 SelectionDAG &DAG, bool IsSigned = false) {
3664 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3665 // Check the ImmArg.
3666 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3667 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3668 DAG.getContext()->emitError(Op->getOperationName(0) +
3669 ": argument out of range.");
3670 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3671 }
3672 return SDValue();
3673}
3674
3675SDValue
3676LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3677 SelectionDAG &DAG) const {
3678 switch (Op.getConstantOperandVal(0)) {
3679 default:
3680 return SDValue(); // Don't custom lower most intrinsics.
3681 case Intrinsic::thread_pointer: {
3682 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3683 return DAG.getRegister(LoongArch::R2, PtrVT);
3684 }
3685 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3686 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3687 case Intrinsic::loongarch_lsx_vreplvei_d:
3688 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3689 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3690 case Intrinsic::loongarch_lsx_vreplvei_w:
3691 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3692 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3693 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3694 case Intrinsic::loongarch_lasx_xvpickve_d:
3695 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3696 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3697 case Intrinsic::loongarch_lasx_xvinsve0_d:
3698 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3699 case Intrinsic::loongarch_lsx_vsat_b:
3700 case Intrinsic::loongarch_lsx_vsat_bu:
3701 case Intrinsic::loongarch_lsx_vrotri_b:
3702 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3703 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3704 case Intrinsic::loongarch_lsx_vsrlri_b:
3705 case Intrinsic::loongarch_lsx_vsrari_b:
3706 case Intrinsic::loongarch_lsx_vreplvei_h:
3707 case Intrinsic::loongarch_lasx_xvsat_b:
3708 case Intrinsic::loongarch_lasx_xvsat_bu:
3709 case Intrinsic::loongarch_lasx_xvrotri_b:
3710 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3711 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3712 case Intrinsic::loongarch_lasx_xvsrlri_b:
3713 case Intrinsic::loongarch_lasx_xvsrari_b:
3714 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3715 case Intrinsic::loongarch_lasx_xvpickve_w:
3716 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3717 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3718 case Intrinsic::loongarch_lasx_xvinsve0_w:
3719 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3720 case Intrinsic::loongarch_lsx_vsat_h:
3721 case Intrinsic::loongarch_lsx_vsat_hu:
3722 case Intrinsic::loongarch_lsx_vrotri_h:
3723 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3724 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3725 case Intrinsic::loongarch_lsx_vsrlri_h:
3726 case Intrinsic::loongarch_lsx_vsrari_h:
3727 case Intrinsic::loongarch_lsx_vreplvei_b:
3728 case Intrinsic::loongarch_lasx_xvsat_h:
3729 case Intrinsic::loongarch_lasx_xvsat_hu:
3730 case Intrinsic::loongarch_lasx_xvrotri_h:
3731 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3732 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3733 case Intrinsic::loongarch_lasx_xvsrlri_h:
3734 case Intrinsic::loongarch_lasx_xvsrari_h:
3735 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3736 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3737 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3738 case Intrinsic::loongarch_lsx_vsrani_b_h:
3739 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3740 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3741 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3742 case Intrinsic::loongarch_lsx_vssrani_b_h:
3743 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3744 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3745 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3746 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3747 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3748 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3749 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3750 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3751 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3752 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3753 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3754 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3755 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3756 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3757 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3758 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3759 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3760 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3761 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3762 case Intrinsic::loongarch_lsx_vsat_w:
3763 case Intrinsic::loongarch_lsx_vsat_wu:
3764 case Intrinsic::loongarch_lsx_vrotri_w:
3765 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3766 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3767 case Intrinsic::loongarch_lsx_vsrlri_w:
3768 case Intrinsic::loongarch_lsx_vsrari_w:
3769 case Intrinsic::loongarch_lsx_vslei_bu:
3770 case Intrinsic::loongarch_lsx_vslei_hu:
3771 case Intrinsic::loongarch_lsx_vslei_wu:
3772 case Intrinsic::loongarch_lsx_vslei_du:
3773 case Intrinsic::loongarch_lsx_vslti_bu:
3774 case Intrinsic::loongarch_lsx_vslti_hu:
3775 case Intrinsic::loongarch_lsx_vslti_wu:
3776 case Intrinsic::loongarch_lsx_vslti_du:
3777 case Intrinsic::loongarch_lsx_vbsll_v:
3778 case Intrinsic::loongarch_lsx_vbsrl_v:
3779 case Intrinsic::loongarch_lasx_xvsat_w:
3780 case Intrinsic::loongarch_lasx_xvsat_wu:
3781 case Intrinsic::loongarch_lasx_xvrotri_w:
3782 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3783 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3784 case Intrinsic::loongarch_lasx_xvsrlri_w:
3785 case Intrinsic::loongarch_lasx_xvsrari_w:
3786 case Intrinsic::loongarch_lasx_xvslei_bu:
3787 case Intrinsic::loongarch_lasx_xvslei_hu:
3788 case Intrinsic::loongarch_lasx_xvslei_wu:
3789 case Intrinsic::loongarch_lasx_xvslei_du:
3790 case Intrinsic::loongarch_lasx_xvslti_bu:
3791 case Intrinsic::loongarch_lasx_xvslti_hu:
3792 case Intrinsic::loongarch_lasx_xvslti_wu:
3793 case Intrinsic::loongarch_lasx_xvslti_du:
3794 case Intrinsic::loongarch_lasx_xvbsll_v:
3795 case Intrinsic::loongarch_lasx_xvbsrl_v:
3796 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3797 case Intrinsic::loongarch_lsx_vseqi_b:
3798 case Intrinsic::loongarch_lsx_vseqi_h:
3799 case Intrinsic::loongarch_lsx_vseqi_w:
3800 case Intrinsic::loongarch_lsx_vseqi_d:
3801 case Intrinsic::loongarch_lsx_vslei_b:
3802 case Intrinsic::loongarch_lsx_vslei_h:
3803 case Intrinsic::loongarch_lsx_vslei_w:
3804 case Intrinsic::loongarch_lsx_vslei_d:
3805 case Intrinsic::loongarch_lsx_vslti_b:
3806 case Intrinsic::loongarch_lsx_vslti_h:
3807 case Intrinsic::loongarch_lsx_vslti_w:
3808 case Intrinsic::loongarch_lsx_vslti_d:
3809 case Intrinsic::loongarch_lasx_xvseqi_b:
3810 case Intrinsic::loongarch_lasx_xvseqi_h:
3811 case Intrinsic::loongarch_lasx_xvseqi_w:
3812 case Intrinsic::loongarch_lasx_xvseqi_d:
3813 case Intrinsic::loongarch_lasx_xvslei_b:
3814 case Intrinsic::loongarch_lasx_xvslei_h:
3815 case Intrinsic::loongarch_lasx_xvslei_w:
3816 case Intrinsic::loongarch_lasx_xvslei_d:
3817 case Intrinsic::loongarch_lasx_xvslti_b:
3818 case Intrinsic::loongarch_lasx_xvslti_h:
3819 case Intrinsic::loongarch_lasx_xvslti_w:
3820 case Intrinsic::loongarch_lasx_xvslti_d:
3821 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3822 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3823 case Intrinsic::loongarch_lsx_vsrani_h_w:
3824 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3825 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3826 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3827 case Intrinsic::loongarch_lsx_vssrani_h_w:
3828 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3829 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3830 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3831 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3832 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3833 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3834 case Intrinsic::loongarch_lsx_vfrstpi_b:
3835 case Intrinsic::loongarch_lsx_vfrstpi_h:
3836 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3837 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3838 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3839 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3840 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3841 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3842 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3843 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3844 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3845 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3846 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3847 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3848 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3849 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3850 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3851 case Intrinsic::loongarch_lsx_vsat_d:
3852 case Intrinsic::loongarch_lsx_vsat_du:
3853 case Intrinsic::loongarch_lsx_vrotri_d:
3854 case Intrinsic::loongarch_lsx_vsrlri_d:
3855 case Intrinsic::loongarch_lsx_vsrari_d:
3856 case Intrinsic::loongarch_lasx_xvsat_d:
3857 case Intrinsic::loongarch_lasx_xvsat_du:
3858 case Intrinsic::loongarch_lasx_xvrotri_d:
3859 case Intrinsic::loongarch_lasx_xvsrlri_d:
3860 case Intrinsic::loongarch_lasx_xvsrari_d:
3861 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3862 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3863 case Intrinsic::loongarch_lsx_vsrani_w_d:
3864 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3865 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3866 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3867 case Intrinsic::loongarch_lsx_vssrani_w_d:
3868 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3869 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3870 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3871 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3872 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3873 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3874 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3875 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3876 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3877 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3878 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3879 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3880 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3881 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3882 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3883 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3884 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3885 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3886 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3887 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3888 case Intrinsic::loongarch_lsx_vsrani_d_q:
3889 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3890 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3891 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3892 case Intrinsic::loongarch_lsx_vssrani_d_q:
3893 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3894 case Intrinsic::loongarch_lsx_vssrani_du_q:
3895 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3896 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3897 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3898 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3899 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3900 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3901 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3902 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3903 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3904 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3905 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3906 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3907 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3908 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3909 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3910 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3911 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3912 case Intrinsic::loongarch_lsx_vnori_b:
3913 case Intrinsic::loongarch_lsx_vshuf4i_b:
3914 case Intrinsic::loongarch_lsx_vshuf4i_h:
3915 case Intrinsic::loongarch_lsx_vshuf4i_w:
3916 case Intrinsic::loongarch_lasx_xvnori_b:
3917 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3918 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3919 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3920 case Intrinsic::loongarch_lasx_xvpermi_d:
3921 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3922 case Intrinsic::loongarch_lsx_vshuf4i_d:
3923 case Intrinsic::loongarch_lsx_vpermi_w:
3924 case Intrinsic::loongarch_lsx_vbitseli_b:
3925 case Intrinsic::loongarch_lsx_vextrins_b:
3926 case Intrinsic::loongarch_lsx_vextrins_h:
3927 case Intrinsic::loongarch_lsx_vextrins_w:
3928 case Intrinsic::loongarch_lsx_vextrins_d:
3929 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3930 case Intrinsic::loongarch_lasx_xvpermi_w:
3931 case Intrinsic::loongarch_lasx_xvpermi_q:
3932 case Intrinsic::loongarch_lasx_xvbitseli_b:
3933 case Intrinsic::loongarch_lasx_xvextrins_b:
3934 case Intrinsic::loongarch_lasx_xvextrins_h:
3935 case Intrinsic::loongarch_lasx_xvextrins_w:
3936 case Intrinsic::loongarch_lasx_xvextrins_d:
3937 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3938 case Intrinsic::loongarch_lsx_vrepli_b:
3939 case Intrinsic::loongarch_lsx_vrepli_h:
3940 case Intrinsic::loongarch_lsx_vrepli_w:
3941 case Intrinsic::loongarch_lsx_vrepli_d:
3942 case Intrinsic::loongarch_lasx_xvrepli_b:
3943 case Intrinsic::loongarch_lasx_xvrepli_h:
3944 case Intrinsic::loongarch_lasx_xvrepli_w:
3945 case Intrinsic::loongarch_lasx_xvrepli_d:
3946 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3947 case Intrinsic::loongarch_lsx_vldi:
3948 case Intrinsic::loongarch_lasx_xvldi:
3949 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3950 }
3951}
3952
3953// Helper function that emits error message for intrinsics with chain and return
3954// merge values of a UNDEF and the chain.
3956 StringRef ErrorMsg,
3957 SelectionDAG &DAG) {
3958 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3959 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3960 SDLoc(Op));
3961}
3962
3963SDValue
3964LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3965 SelectionDAG &DAG) const {
3966 SDLoc DL(Op);
3967 MVT GRLenVT = Subtarget.getGRLenVT();
3968 EVT VT = Op.getValueType();
3969 SDValue Chain = Op.getOperand(0);
3970 const StringRef ErrorMsgOOR = "argument out of range";
3971 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3972 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3973
3974 switch (Op.getConstantOperandVal(1)) {
3975 default:
3976 return Op;
3977 case Intrinsic::loongarch_crc_w_b_w:
3978 case Intrinsic::loongarch_crc_w_h_w:
3979 case Intrinsic::loongarch_crc_w_w_w:
3980 case Intrinsic::loongarch_crc_w_d_w:
3981 case Intrinsic::loongarch_crcc_w_b_w:
3982 case Intrinsic::loongarch_crcc_w_h_w:
3983 case Intrinsic::loongarch_crcc_w_w_w:
3984 case Intrinsic::loongarch_crcc_w_d_w:
3985 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3986 case Intrinsic::loongarch_csrrd_w:
3987 case Intrinsic::loongarch_csrrd_d: {
3988 unsigned Imm = Op.getConstantOperandVal(2);
3989 return !isUInt<14>(Imm)
3990 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3991 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3992 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3993 }
3994 case Intrinsic::loongarch_csrwr_w:
3995 case Intrinsic::loongarch_csrwr_d: {
3996 unsigned Imm = Op.getConstantOperandVal(3);
3997 return !isUInt<14>(Imm)
3998 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3999 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4000 {Chain, Op.getOperand(2),
4001 DAG.getConstant(Imm, DL, GRLenVT)});
4002 }
4003 case Intrinsic::loongarch_csrxchg_w:
4004 case Intrinsic::loongarch_csrxchg_d: {
4005 unsigned Imm = Op.getConstantOperandVal(4);
4006 return !isUInt<14>(Imm)
4007 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4008 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4009 {Chain, Op.getOperand(2), Op.getOperand(3),
4010 DAG.getConstant(Imm, DL, GRLenVT)});
4011 }
4012 case Intrinsic::loongarch_iocsrrd_d: {
4013 return DAG.getNode(
4014 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4015 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4016 }
4017#define IOCSRRD_CASE(NAME, NODE) \
4018 case Intrinsic::loongarch_##NAME: { \
4019 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4020 {Chain, Op.getOperand(2)}); \
4021 }
4022 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4023 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4024 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4025#undef IOCSRRD_CASE
4026 case Intrinsic::loongarch_cpucfg: {
4027 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4028 {Chain, Op.getOperand(2)});
4029 }
4030 case Intrinsic::loongarch_lddir_d: {
4031 unsigned Imm = Op.getConstantOperandVal(3);
4032 return !isUInt<8>(Imm)
4033 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4034 : Op;
4035 }
4036 case Intrinsic::loongarch_movfcsr2gr: {
4037 if (!Subtarget.hasBasicF())
4038 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4039 unsigned Imm = Op.getConstantOperandVal(2);
4040 return !isUInt<2>(Imm)
4041 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4042 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4043 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4044 }
4045 case Intrinsic::loongarch_lsx_vld:
4046 case Intrinsic::loongarch_lsx_vldrepl_b:
4047 case Intrinsic::loongarch_lasx_xvld:
4048 case Intrinsic::loongarch_lasx_xvldrepl_b:
4049 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4050 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4051 : SDValue();
4052 case Intrinsic::loongarch_lsx_vldrepl_h:
4053 case Intrinsic::loongarch_lasx_xvldrepl_h:
4054 return !isShiftedInt<11, 1>(
4055 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4057 Op, "argument out of range or not a multiple of 2", DAG)
4058 : SDValue();
4059 case Intrinsic::loongarch_lsx_vldrepl_w:
4060 case Intrinsic::loongarch_lasx_xvldrepl_w:
4061 return !isShiftedInt<10, 2>(
4062 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4064 Op, "argument out of range or not a multiple of 4", DAG)
4065 : SDValue();
4066 case Intrinsic::loongarch_lsx_vldrepl_d:
4067 case Intrinsic::loongarch_lasx_xvldrepl_d:
4068 return !isShiftedInt<9, 3>(
4069 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4071 Op, "argument out of range or not a multiple of 8", DAG)
4072 : SDValue();
4073 }
4074}
4075
4076// Helper function that emits error message for intrinsics with void return
4077// value and return the chain.
4079 SelectionDAG &DAG) {
4080
4081 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4082 return Op.getOperand(0);
4083}
4084
4085SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4086 SelectionDAG &DAG) const {
4087 SDLoc DL(Op);
4088 MVT GRLenVT = Subtarget.getGRLenVT();
4089 SDValue Chain = Op.getOperand(0);
4090 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4091 SDValue Op2 = Op.getOperand(2);
4092 const StringRef ErrorMsgOOR = "argument out of range";
4093 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4094 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4095 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4096
4097 switch (IntrinsicEnum) {
4098 default:
4099 // TODO: Add more Intrinsics.
4100 return SDValue();
4101 case Intrinsic::loongarch_cacop_d:
4102 case Intrinsic::loongarch_cacop_w: {
4103 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4104 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4105 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4106 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4107 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4108 unsigned Imm1 = Op2->getAsZExtVal();
4109 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4110 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4111 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4112 return Op;
4113 }
4114 case Intrinsic::loongarch_dbar: {
4115 unsigned Imm = Op2->getAsZExtVal();
4116 return !isUInt<15>(Imm)
4117 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4118 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4119 DAG.getConstant(Imm, DL, GRLenVT));
4120 }
4121 case Intrinsic::loongarch_ibar: {
4122 unsigned Imm = Op2->getAsZExtVal();
4123 return !isUInt<15>(Imm)
4124 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4125 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4126 DAG.getConstant(Imm, DL, GRLenVT));
4127 }
4128 case Intrinsic::loongarch_break: {
4129 unsigned Imm = Op2->getAsZExtVal();
4130 return !isUInt<15>(Imm)
4131 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4132 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4133 DAG.getConstant(Imm, DL, GRLenVT));
4134 }
4135 case Intrinsic::loongarch_movgr2fcsr: {
4136 if (!Subtarget.hasBasicF())
4137 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4138 unsigned Imm = Op2->getAsZExtVal();
4139 return !isUInt<2>(Imm)
4140 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4141 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4142 DAG.getConstant(Imm, DL, GRLenVT),
4143 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4144 Op.getOperand(3)));
4145 }
4146 case Intrinsic::loongarch_syscall: {
4147 unsigned Imm = Op2->getAsZExtVal();
4148 return !isUInt<15>(Imm)
4149 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4150 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4151 DAG.getConstant(Imm, DL, GRLenVT));
4152 }
4153#define IOCSRWR_CASE(NAME, NODE) \
4154 case Intrinsic::loongarch_##NAME: { \
4155 SDValue Op3 = Op.getOperand(3); \
4156 return Subtarget.is64Bit() \
4157 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4158 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4159 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4160 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4161 Op3); \
4162 }
4163 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4164 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4165 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4166#undef IOCSRWR_CASE
4167 case Intrinsic::loongarch_iocsrwr_d: {
4168 return !Subtarget.is64Bit()
4169 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4170 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4171 Op2,
4172 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4173 Op.getOperand(3)));
4174 }
4175#define ASRT_LE_GT_CASE(NAME) \
4176 case Intrinsic::loongarch_##NAME: { \
4177 return !Subtarget.is64Bit() \
4178 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4179 : Op; \
4180 }
4181 ASRT_LE_GT_CASE(asrtle_d)
4182 ASRT_LE_GT_CASE(asrtgt_d)
4183#undef ASRT_LE_GT_CASE
4184 case Intrinsic::loongarch_ldpte_d: {
4185 unsigned Imm = Op.getConstantOperandVal(3);
4186 return !Subtarget.is64Bit()
4187 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4188 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4189 : Op;
4190 }
4191 case Intrinsic::loongarch_lsx_vst:
4192 case Intrinsic::loongarch_lasx_xvst:
4193 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4194 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4195 : SDValue();
4196 case Intrinsic::loongarch_lasx_xvstelm_b:
4197 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4198 !isUInt<5>(Op.getConstantOperandVal(5)))
4199 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4200 : SDValue();
4201 case Intrinsic::loongarch_lsx_vstelm_b:
4202 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4203 !isUInt<4>(Op.getConstantOperandVal(5)))
4204 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4205 : SDValue();
4206 case Intrinsic::loongarch_lasx_xvstelm_h:
4207 return (!isShiftedInt<8, 1>(
4208 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4209 !isUInt<4>(Op.getConstantOperandVal(5)))
4211 Op, "argument out of range or not a multiple of 2", DAG)
4212 : SDValue();
4213 case Intrinsic::loongarch_lsx_vstelm_h:
4214 return (!isShiftedInt<8, 1>(
4215 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4216 !isUInt<3>(Op.getConstantOperandVal(5)))
4218 Op, "argument out of range or not a multiple of 2", DAG)
4219 : SDValue();
4220 case Intrinsic::loongarch_lasx_xvstelm_w:
4221 return (!isShiftedInt<8, 2>(
4222 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4223 !isUInt<3>(Op.getConstantOperandVal(5)))
4225 Op, "argument out of range or not a multiple of 4", DAG)
4226 : SDValue();
4227 case Intrinsic::loongarch_lsx_vstelm_w:
4228 return (!isShiftedInt<8, 2>(
4229 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4230 !isUInt<2>(Op.getConstantOperandVal(5)))
4232 Op, "argument out of range or not a multiple of 4", DAG)
4233 : SDValue();
4234 case Intrinsic::loongarch_lasx_xvstelm_d:
4235 return (!isShiftedInt<8, 3>(
4236 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4237 !isUInt<2>(Op.getConstantOperandVal(5)))
4239 Op, "argument out of range or not a multiple of 8", DAG)
4240 : SDValue();
4241 case Intrinsic::loongarch_lsx_vstelm_d:
4242 return (!isShiftedInt<8, 3>(
4243 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4244 !isUInt<1>(Op.getConstantOperandVal(5)))
4246 Op, "argument out of range or not a multiple of 8", DAG)
4247 : SDValue();
4248 }
4249}
4250
4251SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4252 SelectionDAG &DAG) const {
4253 SDLoc DL(Op);
4254 SDValue Lo = Op.getOperand(0);
4255 SDValue Hi = Op.getOperand(1);
4256 SDValue Shamt = Op.getOperand(2);
4257 EVT VT = Lo.getValueType();
4258
4259 // if Shamt-GRLen < 0: // Shamt < GRLen
4260 // Lo = Lo << Shamt
4261 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4262 // else:
4263 // Lo = 0
4264 // Hi = Lo << (Shamt-GRLen)
4265
4266 SDValue Zero = DAG.getConstant(0, DL, VT);
4267 SDValue One = DAG.getConstant(1, DL, VT);
4268 SDValue MinusGRLen =
4269 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4270 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4271 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4272 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4273
4274 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4275 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4276 SDValue ShiftRightLo =
4277 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4278 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4279 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4280 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4281
4282 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4283
4284 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4285 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4286
4287 SDValue Parts[2] = {Lo, Hi};
4288 return DAG.getMergeValues(Parts, DL);
4289}
4290
4291SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4292 SelectionDAG &DAG,
4293 bool IsSRA) const {
4294 SDLoc DL(Op);
4295 SDValue Lo = Op.getOperand(0);
4296 SDValue Hi = Op.getOperand(1);
4297 SDValue Shamt = Op.getOperand(2);
4298 EVT VT = Lo.getValueType();
4299
4300 // SRA expansion:
4301 // if Shamt-GRLen < 0: // Shamt < GRLen
4302 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4303 // Hi = Hi >>s Shamt
4304 // else:
4305 // Lo = Hi >>s (Shamt-GRLen);
4306 // Hi = Hi >>s (GRLen-1)
4307 //
4308 // SRL expansion:
4309 // if Shamt-GRLen < 0: // Shamt < GRLen
4310 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4311 // Hi = Hi >>u Shamt
4312 // else:
4313 // Lo = Hi >>u (Shamt-GRLen);
4314 // Hi = 0;
4315
4316 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4317
4318 SDValue Zero = DAG.getConstant(0, DL, VT);
4319 SDValue One = DAG.getConstant(1, DL, VT);
4320 SDValue MinusGRLen =
4321 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4322 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4323 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4324 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4325
4326 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4327 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4328 SDValue ShiftLeftHi =
4329 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4330 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4331 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4332 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4333 SDValue HiFalse =
4334 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4335
4336 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4337
4338 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4339 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4340
4341 SDValue Parts[2] = {Lo, Hi};
4342 return DAG.getMergeValues(Parts, DL);
4343}
4344
4345// Returns the opcode of the target-specific SDNode that implements the 32-bit
4346// form of the given Opcode.
4348 switch (Opcode) {
4349 default:
4350 llvm_unreachable("Unexpected opcode");
4351 case ISD::SDIV:
4352 return LoongArchISD::DIV_W;
4353 case ISD::UDIV:
4354 return LoongArchISD::DIV_WU;
4355 case ISD::SREM:
4356 return LoongArchISD::MOD_W;
4357 case ISD::UREM:
4358 return LoongArchISD::MOD_WU;
4359 case ISD::SHL:
4360 return LoongArchISD::SLL_W;
4361 case ISD::SRA:
4362 return LoongArchISD::SRA_W;
4363 case ISD::SRL:
4364 return LoongArchISD::SRL_W;
4365 case ISD::ROTL:
4366 case ISD::ROTR:
4367 return LoongArchISD::ROTR_W;
4368 case ISD::CTTZ:
4369 return LoongArchISD::CTZ_W;
4370 case ISD::CTLZ:
4371 return LoongArchISD::CLZ_W;
4372 }
4373}
4374
4375// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4376// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4377// otherwise be promoted to i64, making it difficult to select the
4378// SLL_W/.../*W later one because the fact the operation was originally of
4379// type i8/i16/i32 is lost.
4381 unsigned ExtOpc = ISD::ANY_EXTEND) {
4382 SDLoc DL(N);
4383 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4384 SDValue NewOp0, NewRes;
4385
4386 switch (NumOp) {
4387 default:
4388 llvm_unreachable("Unexpected NumOp");
4389 case 1: {
4390 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4391 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4392 break;
4393 }
4394 case 2: {
4395 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4396 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4397 if (N->getOpcode() == ISD::ROTL) {
4398 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4399 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4400 }
4401 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4402 break;
4403 }
4404 // TODO:Handle more NumOp.
4405 }
4406
4407 // ReplaceNodeResults requires we maintain the same type for the return
4408 // value.
4409 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4410}
4411
4412// Converts the given 32-bit operation to a i64 operation with signed extension
4413// semantic to reduce the signed extension instructions.
4415 SDLoc DL(N);
4416 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4417 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4418 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4419 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4420 DAG.getValueType(MVT::i32));
4421 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4422}
4423
4424// Helper function that emits error message for intrinsics with/without chain
4425// and return a UNDEF or and the chain as the results.
4428 StringRef ErrorMsg, bool WithChain = true) {
4429 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4430 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4431 if (!WithChain)
4432 return;
4433 Results.push_back(N->getOperand(0));
4434}
4435
4436template <unsigned N>
4437static void
4439 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4440 unsigned ResOp) {
4441 const StringRef ErrorMsgOOR = "argument out of range";
4442 unsigned Imm = Node->getConstantOperandVal(2);
4443 if (!isUInt<N>(Imm)) {
4445 /*WithChain=*/false);
4446 return;
4447 }
4448 SDLoc DL(Node);
4449 SDValue Vec = Node->getOperand(1);
4450
4451 SDValue PickElt =
4452 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4453 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4455 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4456 PickElt.getValue(0)));
4457}
4458
4461 SelectionDAG &DAG,
4462 const LoongArchSubtarget &Subtarget,
4463 unsigned ResOp) {
4464 SDLoc DL(N);
4465 SDValue Vec = N->getOperand(1);
4466
4467 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4468 Results.push_back(
4469 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4470}
4471
4472static void
4474 SelectionDAG &DAG,
4475 const LoongArchSubtarget &Subtarget) {
4476 switch (N->getConstantOperandVal(0)) {
4477 default:
4478 llvm_unreachable("Unexpected Intrinsic.");
4479 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4480 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4482 break;
4483 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4484 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4485 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4487 break;
4488 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4489 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4491 break;
4492 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4493 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4495 break;
4496 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4497 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4498 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4500 break;
4501 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4502 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4504 break;
4505 case Intrinsic::loongarch_lsx_bz_b:
4506 case Intrinsic::loongarch_lsx_bz_h:
4507 case Intrinsic::loongarch_lsx_bz_w:
4508 case Intrinsic::loongarch_lsx_bz_d:
4509 case Intrinsic::loongarch_lasx_xbz_b:
4510 case Intrinsic::loongarch_lasx_xbz_h:
4511 case Intrinsic::loongarch_lasx_xbz_w:
4512 case Intrinsic::loongarch_lasx_xbz_d:
4513 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4515 break;
4516 case Intrinsic::loongarch_lsx_bz_v:
4517 case Intrinsic::loongarch_lasx_xbz_v:
4518 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4520 break;
4521 case Intrinsic::loongarch_lsx_bnz_b:
4522 case Intrinsic::loongarch_lsx_bnz_h:
4523 case Intrinsic::loongarch_lsx_bnz_w:
4524 case Intrinsic::loongarch_lsx_bnz_d:
4525 case Intrinsic::loongarch_lasx_xbnz_b:
4526 case Intrinsic::loongarch_lasx_xbnz_h:
4527 case Intrinsic::loongarch_lasx_xbnz_w:
4528 case Intrinsic::loongarch_lasx_xbnz_d:
4529 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4531 break;
4532 case Intrinsic::loongarch_lsx_bnz_v:
4533 case Intrinsic::loongarch_lasx_xbnz_v:
4534 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4536 break;
4537 }
4538}
4539
4542 SelectionDAG &DAG) {
4543 assert(N->getValueType(0) == MVT::i128 &&
4544 "AtomicCmpSwap on types less than 128 should be legal");
4545 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4546
4547 unsigned Opcode;
4548 switch (MemOp->getMergedOrdering()) {
4552 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4553 break;
4556 Opcode = LoongArch::PseudoCmpXchg128;
4557 break;
4558 default:
4559 llvm_unreachable("Unexpected ordering!");
4560 }
4561
4562 SDLoc DL(N);
4563 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4564 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4565 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4566 NewVal.first, NewVal.second, N->getOperand(0)};
4567
4568 SDNode *CmpSwap = DAG.getMachineNode(
4569 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4570 Ops);
4571 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4572 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4573 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4574 Results.push_back(SDValue(CmpSwap, 3));
4575}
4576
4579 SDLoc DL(N);
4580 EVT VT = N->getValueType(0);
4581 switch (N->getOpcode()) {
4582 default:
4583 llvm_unreachable("Don't know how to legalize this operation");
4584 case ISD::ADD:
4585 case ISD::SUB:
4586 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4587 "Unexpected custom legalisation");
4588 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4589 break;
4590 case ISD::SDIV:
4591 case ISD::UDIV:
4592 case ISD::SREM:
4593 case ISD::UREM:
4594 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4595 "Unexpected custom legalisation");
4596 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4597 Subtarget.hasDiv32() && VT == MVT::i32
4599 : ISD::SIGN_EXTEND));
4600 break;
4601 case ISD::SHL:
4602 case ISD::SRA:
4603 case ISD::SRL:
4604 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4605 "Unexpected custom legalisation");
4606 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4607 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4608 break;
4609 }
4610 break;
4611 case ISD::ROTL:
4612 case ISD::ROTR:
4613 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4614 "Unexpected custom legalisation");
4615 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4616 break;
4617 case ISD::FP_TO_SINT: {
4618 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4619 "Unexpected custom legalisation");
4620 SDValue Src = N->getOperand(0);
4621 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4622 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4624 if (!isTypeLegal(Src.getValueType()))
4625 return;
4626 if (Src.getValueType() == MVT::f16)
4627 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4628 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4629 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4630 return;
4631 }
4632 // If the FP type needs to be softened, emit a library call using the 'si'
4633 // version. If we left it to default legalization we'd end up with 'di'.
4634 RTLIB::Libcall LC;
4635 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4636 MakeLibCallOptions CallOptions;
4637 EVT OpVT = Src.getValueType();
4638 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4639 SDValue Chain = SDValue();
4640 SDValue Result;
4641 std::tie(Result, Chain) =
4642 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4643 Results.push_back(Result);
4644 break;
4645 }
4646 case ISD::BITCAST: {
4647 SDValue Src = N->getOperand(0);
4648 EVT SrcVT = Src.getValueType();
4649 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4650 Subtarget.hasBasicF()) {
4651 SDValue Dst =
4652 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4653 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4654 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4656 DAG.getVTList(MVT::i32, MVT::i32), Src);
4657 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4658 NewReg.getValue(0), NewReg.getValue(1));
4659 Results.push_back(RetReg);
4660 }
4661 break;
4662 }
4663 case ISD::FP_TO_UINT: {
4664 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4665 "Unexpected custom legalisation");
4666 auto &TLI = DAG.getTargetLoweringInfo();
4667 SDValue Tmp1, Tmp2;
4668 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4669 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4670 break;
4671 }
4672 case ISD::BSWAP: {
4673 SDValue Src = N->getOperand(0);
4674 assert((VT == MVT::i16 || VT == MVT::i32) &&
4675 "Unexpected custom legalization");
4676 MVT GRLenVT = Subtarget.getGRLenVT();
4677 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4678 SDValue Tmp;
4679 switch (VT.getSizeInBits()) {
4680 default:
4681 llvm_unreachable("Unexpected operand width");
4682 case 16:
4683 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4684 break;
4685 case 32:
4686 // Only LA64 will get to here due to the size mismatch between VT and
4687 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4688 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4689 break;
4690 }
4691 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4692 break;
4693 }
4694 case ISD::BITREVERSE: {
4695 SDValue Src = N->getOperand(0);
4696 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4697 "Unexpected custom legalization");
4698 MVT GRLenVT = Subtarget.getGRLenVT();
4699 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4700 SDValue Tmp;
4701 switch (VT.getSizeInBits()) {
4702 default:
4703 llvm_unreachable("Unexpected operand width");
4704 case 8:
4705 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4706 break;
4707 case 32:
4708 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4709 break;
4710 }
4711 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4712 break;
4713 }
4714 case ISD::CTLZ:
4715 case ISD::CTTZ: {
4716 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4717 "Unexpected custom legalisation");
4718 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4719 break;
4720 }
4722 SDValue Chain = N->getOperand(0);
4723 SDValue Op2 = N->getOperand(2);
4724 MVT GRLenVT = Subtarget.getGRLenVT();
4725 const StringRef ErrorMsgOOR = "argument out of range";
4726 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4727 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4728
4729 switch (N->getConstantOperandVal(1)) {
4730 default:
4731 llvm_unreachable("Unexpected Intrinsic.");
4732 case Intrinsic::loongarch_movfcsr2gr: {
4733 if (!Subtarget.hasBasicF()) {
4734 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4735 return;
4736 }
4737 unsigned Imm = Op2->getAsZExtVal();
4738 if (!isUInt<2>(Imm)) {
4739 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4740 return;
4741 }
4742 SDValue MOVFCSR2GRResults = DAG.getNode(
4743 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4744 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4745 Results.push_back(
4746 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4747 Results.push_back(MOVFCSR2GRResults.getValue(1));
4748 break;
4749 }
4750#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4751 case Intrinsic::loongarch_##NAME: { \
4752 SDValue NODE = DAG.getNode( \
4753 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4754 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4755 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4756 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4757 Results.push_back(NODE.getValue(1)); \
4758 break; \
4759 }
4760 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4761 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4762 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4763 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4764 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4765 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4766#undef CRC_CASE_EXT_BINARYOP
4767
4768#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4769 case Intrinsic::loongarch_##NAME: { \
4770 SDValue NODE = DAG.getNode( \
4771 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4772 {Chain, Op2, \
4773 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4774 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4775 Results.push_back(NODE.getValue(1)); \
4776 break; \
4777 }
4778 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4779 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4780#undef CRC_CASE_EXT_UNARYOP
4781#define CSR_CASE(ID) \
4782 case Intrinsic::loongarch_##ID: { \
4783 if (!Subtarget.is64Bit()) \
4784 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4785 break; \
4786 }
4787 CSR_CASE(csrrd_d);
4788 CSR_CASE(csrwr_d);
4789 CSR_CASE(csrxchg_d);
4790 CSR_CASE(iocsrrd_d);
4791#undef CSR_CASE
4792 case Intrinsic::loongarch_csrrd_w: {
4793 unsigned Imm = Op2->getAsZExtVal();
4794 if (!isUInt<14>(Imm)) {
4795 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4796 return;
4797 }
4798 SDValue CSRRDResults =
4799 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4800 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4801 Results.push_back(
4802 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4803 Results.push_back(CSRRDResults.getValue(1));
4804 break;
4805 }
4806 case Intrinsic::loongarch_csrwr_w: {
4807 unsigned Imm = N->getConstantOperandVal(3);
4808 if (!isUInt<14>(Imm)) {
4809 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4810 return;
4811 }
4812 SDValue CSRWRResults =
4813 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4814 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4815 DAG.getConstant(Imm, DL, GRLenVT)});
4816 Results.push_back(
4817 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4818 Results.push_back(CSRWRResults.getValue(1));
4819 break;
4820 }
4821 case Intrinsic::loongarch_csrxchg_w: {
4822 unsigned Imm = N->getConstantOperandVal(4);
4823 if (!isUInt<14>(Imm)) {
4824 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4825 return;
4826 }
4827 SDValue CSRXCHGResults = DAG.getNode(
4828 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4829 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4830 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4831 DAG.getConstant(Imm, DL, GRLenVT)});
4832 Results.push_back(
4833 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4834 Results.push_back(CSRXCHGResults.getValue(1));
4835 break;
4836 }
4837#define IOCSRRD_CASE(NAME, NODE) \
4838 case Intrinsic::loongarch_##NAME: { \
4839 SDValue IOCSRRDResults = \
4840 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4841 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4842 Results.push_back( \
4843 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4844 Results.push_back(IOCSRRDResults.getValue(1)); \
4845 break; \
4846 }
4847 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4848 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4849 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4850#undef IOCSRRD_CASE
4851 case Intrinsic::loongarch_cpucfg: {
4852 SDValue CPUCFGResults =
4853 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4854 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4855 Results.push_back(
4856 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4857 Results.push_back(CPUCFGResults.getValue(1));
4858 break;
4859 }
4860 case Intrinsic::loongarch_lddir_d: {
4861 if (!Subtarget.is64Bit()) {
4862 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4863 return;
4864 }
4865 break;
4866 }
4867 }
4868 break;
4869 }
4870 case ISD::READ_REGISTER: {
4871 if (Subtarget.is64Bit())
4872 DAG.getContext()->emitError(
4873 "On LA64, only 64-bit registers can be read.");
4874 else
4875 DAG.getContext()->emitError(
4876 "On LA32, only 32-bit registers can be read.");
4877 Results.push_back(DAG.getUNDEF(VT));
4878 Results.push_back(N->getOperand(0));
4879 break;
4880 }
4882 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4883 break;
4884 }
4885 case ISD::LROUND: {
4886 SDValue Op0 = N->getOperand(0);
4887 EVT OpVT = Op0.getValueType();
4888 RTLIB::Libcall LC =
4889 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4890 MakeLibCallOptions CallOptions;
4891 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4892 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4893 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4894 Results.push_back(Result);
4895 break;
4896 }
4897 case ISD::ATOMIC_CMP_SWAP: {
4899 break;
4900 }
4901 case ISD::TRUNCATE: {
4902 MVT VT = N->getSimpleValueType(0);
4903 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4904 return;
4905
4906 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4907 SDValue In = N->getOperand(0);
4908 EVT InVT = In.getValueType();
4909 EVT InEltVT = InVT.getVectorElementType();
4910 EVT EltVT = VT.getVectorElementType();
4911 unsigned MinElts = VT.getVectorNumElements();
4912 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4913 unsigned InBits = InVT.getSizeInBits();
4914
4915 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4916 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4917 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4918 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4919 for (unsigned I = 0; I < MinElts; ++I)
4920 TruncMask[I] = Scale * I;
4921
4922 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4923 MVT SVT = In.getSimpleValueType().getScalarType();
4924 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4925 SDValue WidenIn =
4926 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4927 DAG.getVectorIdxConstant(0, DL));
4928 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4929 "Illegal vector type in truncation");
4930 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4931 Results.push_back(
4932 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4933 return;
4934 }
4935 }
4936
4937 break;
4938 }
4939 }
4940}
4941
4944 const LoongArchSubtarget &Subtarget) {
4945 if (DCI.isBeforeLegalizeOps())
4946 return SDValue();
4947
4948 SDValue FirstOperand = N->getOperand(0);
4949 SDValue SecondOperand = N->getOperand(1);
4950 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4951 EVT ValTy = N->getValueType(0);
4952 SDLoc DL(N);
4953 uint64_t lsb, msb;
4954 unsigned SMIdx, SMLen;
4955 ConstantSDNode *CN;
4956 SDValue NewOperand;
4957 MVT GRLenVT = Subtarget.getGRLenVT();
4958
4959 // BSTRPICK requires the 32S feature.
4960 if (!Subtarget.has32S())
4961 return SDValue();
4962
4963 // Op's second operand must be a shifted mask.
4964 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4965 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4966 return SDValue();
4967
4968 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4969 // Pattern match BSTRPICK.
4970 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4971 // => BSTRPICK $dst, $src, msb, lsb
4972 // where msb = lsb + len - 1
4973
4974 // The second operand of the shift must be an immediate.
4975 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4976 return SDValue();
4977
4978 lsb = CN->getZExtValue();
4979
4980 // Return if the shifted mask does not start at bit 0 or the sum of its
4981 // length and lsb exceeds the word's size.
4982 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4983 return SDValue();
4984
4985 NewOperand = FirstOperand.getOperand(0);
4986 } else {
4987 // Pattern match BSTRPICK.
4988 // $dst = and $src, (2**len- 1) , if len > 12
4989 // => BSTRPICK $dst, $src, msb, lsb
4990 // where lsb = 0 and msb = len - 1
4991
4992 // If the mask is <= 0xfff, andi can be used instead.
4993 if (CN->getZExtValue() <= 0xfff)
4994 return SDValue();
4995
4996 // Return if the MSB exceeds.
4997 if (SMIdx + SMLen > ValTy.getSizeInBits())
4998 return SDValue();
4999
5000 if (SMIdx > 0) {
5001 // Omit if the constant has more than 2 uses. This a conservative
5002 // decision. Whether it is a win depends on the HW microarchitecture.
5003 // However it should always be better for 1 and 2 uses.
5004 if (CN->use_size() > 2)
5005 return SDValue();
5006 // Return if the constant can be composed by a single LU12I.W.
5007 if ((CN->getZExtValue() & 0xfff) == 0)
5008 return SDValue();
5009 // Return if the constand can be composed by a single ADDI with
5010 // the zero register.
5011 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5012 return SDValue();
5013 }
5014
5015 lsb = SMIdx;
5016 NewOperand = FirstOperand;
5017 }
5018
5019 msb = lsb + SMLen - 1;
5020 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5021 DAG.getConstant(msb, DL, GRLenVT),
5022 DAG.getConstant(lsb, DL, GRLenVT));
5023 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5024 return NR0;
5025 // Try to optimize to
5026 // bstrpick $Rd, $Rs, msb, lsb
5027 // slli $Rd, $Rd, lsb
5028 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5029 DAG.getConstant(lsb, DL, GRLenVT));
5030}
5031
5034 const LoongArchSubtarget &Subtarget) {
5035 // BSTRPICK requires the 32S feature.
5036 if (!Subtarget.has32S())
5037 return SDValue();
5038
5039 if (DCI.isBeforeLegalizeOps())
5040 return SDValue();
5041
5042 // $dst = srl (and $src, Mask), Shamt
5043 // =>
5044 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5045 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5046 //
5047
5048 SDValue FirstOperand = N->getOperand(0);
5049 ConstantSDNode *CN;
5050 EVT ValTy = N->getValueType(0);
5051 SDLoc DL(N);
5052 MVT GRLenVT = Subtarget.getGRLenVT();
5053 unsigned MaskIdx, MaskLen;
5054 uint64_t Shamt;
5055
5056 // The first operand must be an AND and the second operand of the AND must be
5057 // a shifted mask.
5058 if (FirstOperand.getOpcode() != ISD::AND ||
5059 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5060 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5061 return SDValue();
5062
5063 // The second operand (shift amount) must be an immediate.
5064 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5065 return SDValue();
5066
5067 Shamt = CN->getZExtValue();
5068 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5069 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5070 FirstOperand->getOperand(0),
5071 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5072 DAG.getConstant(Shamt, DL, GRLenVT));
5073
5074 return SDValue();
5075}
5076
5077// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5078// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5079static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5080 unsigned Depth) {
5081 // Limit recursion.
5083 return false;
5084 switch (Src.getOpcode()) {
5085 case ISD::SETCC:
5086 case ISD::TRUNCATE:
5087 return Src.getOperand(0).getValueSizeInBits() == Size;
5088 case ISD::FREEZE:
5089 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5090 case ISD::AND:
5091 case ISD::XOR:
5092 case ISD::OR:
5093 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5094 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5095 case ISD::SELECT:
5096 case ISD::VSELECT:
5097 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5098 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5099 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5100 case ISD::BUILD_VECTOR:
5101 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5102 ISD::isBuildVectorAllOnes(Src.getNode());
5103 }
5104 return false;
5105}
5106
5107// Helper to push sign extension of vXi1 SETCC result through bitops.
5109 SDValue Src, const SDLoc &DL) {
5110 switch (Src.getOpcode()) {
5111 case ISD::SETCC:
5112 case ISD::FREEZE:
5113 case ISD::TRUNCATE:
5114 case ISD::BUILD_VECTOR:
5115 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5116 case ISD::AND:
5117 case ISD::XOR:
5118 case ISD::OR:
5119 return DAG.getNode(
5120 Src.getOpcode(), DL, SExtVT,
5121 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5122 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5123 case ISD::SELECT:
5124 case ISD::VSELECT:
5125 return DAG.getSelect(
5126 DL, SExtVT, Src.getOperand(0),
5127 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5128 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5129 }
5130 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5131}
5132
5133static SDValue
5136 const LoongArchSubtarget &Subtarget) {
5137 SDLoc DL(N);
5138 EVT VT = N->getValueType(0);
5139 SDValue Src = N->getOperand(0);
5140 EVT SrcVT = Src.getValueType();
5141
5142 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5143 return SDValue();
5144
5145 bool UseLASX;
5146 unsigned Opc = ISD::DELETED_NODE;
5147 EVT CmpVT = Src.getOperand(0).getValueType();
5148 EVT EltVT = CmpVT.getVectorElementType();
5149
5150 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5151 UseLASX = false;
5152 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5153 CmpVT.getSizeInBits() == 256)
5154 UseLASX = true;
5155 else
5156 return SDValue();
5157
5158 SDValue SrcN1 = Src.getOperand(1);
5159 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5160 default:
5161 break;
5162 case ISD::SETEQ:
5163 // x == 0 => not (vmsknez.b x)
5164 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5166 break;
5167 case ISD::SETGT:
5168 // x > -1 => vmskgez.b x
5169 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5171 break;
5172 case ISD::SETGE:
5173 // x >= 0 => vmskgez.b x
5174 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5176 break;
5177 case ISD::SETLT:
5178 // x < 0 => vmskltz.{b,h,w,d} x
5179 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5180 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5181 EltVT == MVT::i64))
5183 break;
5184 case ISD::SETLE:
5185 // x <= -1 => vmskltz.{b,h,w,d} x
5186 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5187 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5188 EltVT == MVT::i64))
5190 break;
5191 case ISD::SETNE:
5192 // x != 0 => vmsknez.b x
5193 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5195 break;
5196 }
5197
5198 if (Opc == ISD::DELETED_NODE)
5199 return SDValue();
5200
5201 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5203 V = DAG.getZExtOrTrunc(V, DL, T);
5204 return DAG.getBitcast(VT, V);
5205}
5206
5209 const LoongArchSubtarget &Subtarget) {
5210 SDLoc DL(N);
5211 EVT VT = N->getValueType(0);
5212 SDValue Src = N->getOperand(0);
5213 EVT SrcVT = Src.getValueType();
5214 MVT GRLenVT = Subtarget.getGRLenVT();
5215
5216 if (!DCI.isBeforeLegalizeOps())
5217 return SDValue();
5218
5219 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5220 return SDValue();
5221
5222 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5223 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5224 if (Res)
5225 return Res;
5226
5227 // Generate vXi1 using [X]VMSKLTZ
5228 MVT SExtVT;
5229 unsigned Opc;
5230 bool UseLASX = false;
5231 bool PropagateSExt = false;
5232
5233 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5234 EVT CmpVT = Src.getOperand(0).getValueType();
5235 if (CmpVT.getSizeInBits() > 256)
5236 return SDValue();
5237 }
5238
5239 switch (SrcVT.getSimpleVT().SimpleTy) {
5240 default:
5241 return SDValue();
5242 case MVT::v2i1:
5243 SExtVT = MVT::v2i64;
5244 break;
5245 case MVT::v4i1:
5246 SExtVT = MVT::v4i32;
5247 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5248 SExtVT = MVT::v4i64;
5249 UseLASX = true;
5250 PropagateSExt = true;
5251 }
5252 break;
5253 case MVT::v8i1:
5254 SExtVT = MVT::v8i16;
5255 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5256 SExtVT = MVT::v8i32;
5257 UseLASX = true;
5258 PropagateSExt = true;
5259 }
5260 break;
5261 case MVT::v16i1:
5262 SExtVT = MVT::v16i8;
5263 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5264 SExtVT = MVT::v16i16;
5265 UseLASX = true;
5266 PropagateSExt = true;
5267 }
5268 break;
5269 case MVT::v32i1:
5270 SExtVT = MVT::v32i8;
5271 UseLASX = true;
5272 break;
5273 };
5274 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5275 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5276
5277 SDValue V;
5278 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5279 if (Src.getSimpleValueType() == MVT::v32i8) {
5280 SDValue Lo, Hi;
5281 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5282 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5283 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5284 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5285 DAG.getConstant(16, DL, MVT::i8));
5286 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5287 } else if (UseLASX) {
5288 return SDValue();
5289 }
5290 }
5291
5292 if (!V) {
5294 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5295 }
5296
5298 V = DAG.getZExtOrTrunc(V, DL, T);
5299 return DAG.getBitcast(VT, V);
5300}
5301
5304 const LoongArchSubtarget &Subtarget) {
5305 MVT GRLenVT = Subtarget.getGRLenVT();
5306 EVT ValTy = N->getValueType(0);
5307 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5308 ConstantSDNode *CN0, *CN1;
5309 SDLoc DL(N);
5310 unsigned ValBits = ValTy.getSizeInBits();
5311 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5312 unsigned Shamt;
5313 bool SwapAndRetried = false;
5314
5315 // BSTRPICK requires the 32S feature.
5316 if (!Subtarget.has32S())
5317 return SDValue();
5318
5319 if (DCI.isBeforeLegalizeOps())
5320 return SDValue();
5321
5322 if (ValBits != 32 && ValBits != 64)
5323 return SDValue();
5324
5325Retry:
5326 // 1st pattern to match BSTRINS:
5327 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5328 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5329 // =>
5330 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5331 if (N0.getOpcode() == ISD::AND &&
5332 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5333 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5334 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5335 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5336 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5337 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5338 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5339 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5340 (MaskIdx0 + MaskLen0 <= ValBits)) {
5341 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5342 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5343 N1.getOperand(0).getOperand(0),
5344 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5345 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5346 }
5347
5348 // 2nd pattern to match BSTRINS:
5349 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5350 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5351 // =>
5352 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5353 if (N0.getOpcode() == ISD::AND &&
5354 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5355 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5356 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5357 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5358 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5359 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5360 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5361 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5362 (MaskIdx0 + MaskLen0 <= ValBits)) {
5363 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5364 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5365 N1.getOperand(0).getOperand(0),
5366 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5367 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5368 }
5369
5370 // 3rd pattern to match BSTRINS:
5371 // R = or (and X, mask0), (and Y, mask1)
5372 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5373 // =>
5374 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5375 // where msb = lsb + size - 1
5376 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5377 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5378 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5379 (MaskIdx0 + MaskLen0 <= 64) &&
5380 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5381 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5382 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5383 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5384 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5385 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5386 DAG.getConstant(ValBits == 32
5387 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5388 : (MaskIdx0 + MaskLen0 - 1),
5389 DL, GRLenVT),
5390 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5391 }
5392
5393 // 4th pattern to match BSTRINS:
5394 // R = or (and X, mask), (shl Y, shamt)
5395 // where mask = (2**shamt - 1)
5396 // =>
5397 // R = BSTRINS X, Y, ValBits - 1, shamt
5398 // where ValBits = 32 or 64
5399 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5400 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5401 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5402 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5403 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5404 (MaskIdx0 + MaskLen0 <= ValBits)) {
5405 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5406 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5407 N1.getOperand(0),
5408 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5409 DAG.getConstant(Shamt, DL, GRLenVT));
5410 }
5411
5412 // 5th pattern to match BSTRINS:
5413 // R = or (and X, mask), const
5414 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5415 // =>
5416 // R = BSTRINS X, (const >> lsb), msb, lsb
5417 // where msb = lsb + size - 1
5418 if (N0.getOpcode() == ISD::AND &&
5419 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5420 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5421 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5422 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5423 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5424 return DAG.getNode(
5425 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5426 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5427 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5428 : (MaskIdx0 + MaskLen0 - 1),
5429 DL, GRLenVT),
5430 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5431 }
5432
5433 // 6th pattern.
5434 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5435 // by the incoming bits are known to be zero.
5436 // =>
5437 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5438 //
5439 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5440 // pattern is more common than the 1st. So we put the 1st before the 6th in
5441 // order to match as many nodes as possible.
5442 ConstantSDNode *CNMask, *CNShamt;
5443 unsigned MaskIdx, MaskLen;
5444 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5445 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5446 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5447 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5448 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5449 Shamt = CNShamt->getZExtValue();
5450 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5451 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5452 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5453 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5454 N1.getOperand(0).getOperand(0),
5455 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5456 DAG.getConstant(Shamt, DL, GRLenVT));
5457 }
5458 }
5459
5460 // 7th pattern.
5461 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5462 // overwritten by the incoming bits are known to be zero.
5463 // =>
5464 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5465 //
5466 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5467 // before the 7th in order to match as many nodes as possible.
5468 if (N1.getOpcode() == ISD::AND &&
5469 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5470 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5471 N1.getOperand(0).getOpcode() == ISD::SHL &&
5472 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5473 CNShamt->getZExtValue() == MaskIdx) {
5474 APInt ShMask(ValBits, CNMask->getZExtValue());
5475 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5476 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5477 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5478 N1.getOperand(0).getOperand(0),
5479 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5480 DAG.getConstant(MaskIdx, DL, GRLenVT));
5481 }
5482 }
5483
5484 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5485 if (!SwapAndRetried) {
5486 std::swap(N0, N1);
5487 SwapAndRetried = true;
5488 goto Retry;
5489 }
5490
5491 SwapAndRetried = false;
5492Retry2:
5493 // 8th pattern.
5494 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5495 // the incoming bits are known to be zero.
5496 // =>
5497 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5498 //
5499 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5500 // we put it here in order to match as many nodes as possible or generate less
5501 // instructions.
5502 if (N1.getOpcode() == ISD::AND &&
5503 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5504 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5505 APInt ShMask(ValBits, CNMask->getZExtValue());
5506 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5507 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5508 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5509 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5510 N1->getOperand(0),
5511 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5512 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5513 DAG.getConstant(MaskIdx, DL, GRLenVT));
5514 }
5515 }
5516 // Swap N0/N1 and retry.
5517 if (!SwapAndRetried) {
5518 std::swap(N0, N1);
5519 SwapAndRetried = true;
5520 goto Retry2;
5521 }
5522
5523 return SDValue();
5524}
5525
5526static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5527 ExtType = ISD::NON_EXTLOAD;
5528
5529 switch (V.getNode()->getOpcode()) {
5530 case ISD::LOAD: {
5531 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5532 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5533 (LoadNode->getMemoryVT() == MVT::i16)) {
5534 ExtType = LoadNode->getExtensionType();
5535 return true;
5536 }
5537 return false;
5538 }
5539 case ISD::AssertSext: {
5540 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5541 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5542 ExtType = ISD::SEXTLOAD;
5543 return true;
5544 }
5545 return false;
5546 }
5547 case ISD::AssertZext: {
5548 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5549 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5550 ExtType = ISD::ZEXTLOAD;
5551 return true;
5552 }
5553 return false;
5554 }
5555 default:
5556 return false;
5557 }
5558
5559 return false;
5560}
5561
5562// Eliminate redundant truncation and zero-extension nodes.
5563// * Case 1:
5564// +------------+ +------------+ +------------+
5565// | Input1 | | Input2 | | CC |
5566// +------------+ +------------+ +------------+
5567// | | |
5568// V V +----+
5569// +------------+ +------------+ |
5570// | TRUNCATE | | TRUNCATE | |
5571// +------------+ +------------+ |
5572// | | |
5573// V V |
5574// +------------+ +------------+ |
5575// | ZERO_EXT | | ZERO_EXT | |
5576// +------------+ +------------+ |
5577// | | |
5578// | +-------------+ |
5579// V V | |
5580// +----------------+ | |
5581// | AND | | |
5582// +----------------+ | |
5583// | | |
5584// +---------------+ | |
5585// | | |
5586// V V V
5587// +-------------+
5588// | CMP |
5589// +-------------+
5590// * Case 2:
5591// +------------+ +------------+ +-------------+ +------------+ +------------+
5592// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5593// +------------+ +------------+ +-------------+ +------------+ +------------+
5594// | | | | |
5595// V | | | |
5596// +------------+ | | | |
5597// | XOR |<---------------------+ | |
5598// +------------+ | | |
5599// | | | |
5600// V V +---------------+ |
5601// +------------+ +------------+ | |
5602// | TRUNCATE | | TRUNCATE | | +-------------------------+
5603// +------------+ +------------+ | |
5604// | | | |
5605// V V | |
5606// +------------+ +------------+ | |
5607// | ZERO_EXT | | ZERO_EXT | | |
5608// +------------+ +------------+ | |
5609// | | | |
5610// V V | |
5611// +----------------+ | |
5612// | AND | | |
5613// +----------------+ | |
5614// | | |
5615// +---------------+ | |
5616// | | |
5617// V V V
5618// +-------------+
5619// | CMP |
5620// +-------------+
5623 const LoongArchSubtarget &Subtarget) {
5624 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5625
5626 SDNode *AndNode = N->getOperand(0).getNode();
5627 if (AndNode->getOpcode() != ISD::AND)
5628 return SDValue();
5629
5630 SDValue AndInputValue2 = AndNode->getOperand(1);
5631 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5632 return SDValue();
5633
5634 SDValue CmpInputValue = N->getOperand(1);
5635 SDValue AndInputValue1 = AndNode->getOperand(0);
5636 if (AndInputValue1.getOpcode() == ISD::XOR) {
5637 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5638 return SDValue();
5639 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5640 if (!CN || CN->getSExtValue() != -1)
5641 return SDValue();
5642 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5643 if (!CN || CN->getSExtValue() != 0)
5644 return SDValue();
5645 AndInputValue1 = AndInputValue1.getOperand(0);
5646 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5647 return SDValue();
5648 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5649 if (AndInputValue2 != CmpInputValue)
5650 return SDValue();
5651 } else {
5652 return SDValue();
5653 }
5654
5655 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5656 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5657 return SDValue();
5658
5659 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5660 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5661 return SDValue();
5662
5663 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5664 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5665 ISD::LoadExtType ExtType1;
5666 ISD::LoadExtType ExtType2;
5667
5668 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5669 !checkValueWidth(TruncInputValue2, ExtType2))
5670 return SDValue();
5671
5672 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5673 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5674 return SDValue();
5675
5676 if ((ExtType2 != ISD::ZEXTLOAD) &&
5677 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5678 return SDValue();
5679
5680 // These truncation and zero-extension nodes are not necessary, remove them.
5681 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5682 TruncInputValue1, TruncInputValue2);
5683 SDValue NewSetCC =
5684 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5685 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5686 return SDValue(N, 0);
5687}
5688
5689// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5692 const LoongArchSubtarget &Subtarget) {
5693 if (DCI.isBeforeLegalizeOps())
5694 return SDValue();
5695
5696 SDValue Src = N->getOperand(0);
5697 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5698 return SDValue();
5699
5700 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5701 Src.getOperand(0));
5702}
5703
5704// Perform common combines for BR_CC and SELECT_CC conditions.
5705static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5706 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5707 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5708
5709 // As far as arithmetic right shift always saves the sign,
5710 // shift can be omitted.
5711 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5712 // setge (sra X, N), 0 -> setge X, 0
5713 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5714 LHS.getOpcode() == ISD::SRA) {
5715 LHS = LHS.getOperand(0);
5716 return true;
5717 }
5718
5719 if (!ISD::isIntEqualitySetCC(CCVal))
5720 return false;
5721
5722 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5723 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5724 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5725 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5726 // If we're looking for eq 0 instead of ne 0, we need to invert the
5727 // condition.
5728 bool Invert = CCVal == ISD::SETEQ;
5729 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5730 if (Invert)
5731 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5732
5733 RHS = LHS.getOperand(1);
5734 LHS = LHS.getOperand(0);
5735 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5736
5737 CC = DAG.getCondCode(CCVal);
5738 return true;
5739 }
5740
5741 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5742 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5743 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5744 SDValue LHS0 = LHS.getOperand(0);
5745 if (LHS0.getOpcode() == ISD::AND &&
5746 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5747 uint64_t Mask = LHS0.getConstantOperandVal(1);
5748 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5749 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5750 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5751 CC = DAG.getCondCode(CCVal);
5752
5753 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5754 LHS = LHS0.getOperand(0);
5755 if (ShAmt != 0)
5756 LHS =
5757 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5758 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5759 return true;
5760 }
5761 }
5762 }
5763
5764 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5765 // This can occur when legalizing some floating point comparisons.
5766 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5767 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5768 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5769 CC = DAG.getCondCode(CCVal);
5770 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5771 return true;
5772 }
5773
5774 return false;
5775}
5776
5779 const LoongArchSubtarget &Subtarget) {
5780 SDValue LHS = N->getOperand(1);
5781 SDValue RHS = N->getOperand(2);
5782 SDValue CC = N->getOperand(3);
5783 SDLoc DL(N);
5784
5785 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5786 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5787 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5788
5789 return SDValue();
5790}
5791
5794 const LoongArchSubtarget &Subtarget) {
5795 // Transform
5796 SDValue LHS = N->getOperand(0);
5797 SDValue RHS = N->getOperand(1);
5798 SDValue CC = N->getOperand(2);
5799 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5800 SDValue TrueV = N->getOperand(3);
5801 SDValue FalseV = N->getOperand(4);
5802 SDLoc DL(N);
5803 EVT VT = N->getValueType(0);
5804
5805 // If the True and False values are the same, we don't need a select_cc.
5806 if (TrueV == FalseV)
5807 return TrueV;
5808
5809 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5810 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5811 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5813 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5814 if (CCVal == ISD::CondCode::SETGE)
5815 std::swap(TrueV, FalseV);
5816
5817 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5818 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5819 // Only handle simm12, if it is not in this range, it can be considered as
5820 // register.
5821 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5822 isInt<12>(TrueSImm - FalseSImm)) {
5823 SDValue SRA =
5824 DAG.getNode(ISD::SRA, DL, VT, LHS,
5825 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5826 SDValue AND =
5827 DAG.getNode(ISD::AND, DL, VT, SRA,
5828 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5829 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5830 }
5831
5832 if (CCVal == ISD::CondCode::SETGE)
5833 std::swap(TrueV, FalseV);
5834 }
5835
5836 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5837 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5838 {LHS, RHS, CC, TrueV, FalseV});
5839
5840 return SDValue();
5841}
5842
5843template <unsigned N>
5845 SelectionDAG &DAG,
5846 const LoongArchSubtarget &Subtarget,
5847 bool IsSigned = false) {
5848 SDLoc DL(Node);
5849 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5850 // Check the ImmArg.
5851 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5852 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5853 DAG.getContext()->emitError(Node->getOperationName(0) +
5854 ": argument out of range.");
5855 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5856 }
5857 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5858}
5859
5860template <unsigned N>
5861static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5862 SelectionDAG &DAG, bool IsSigned = false) {
5863 SDLoc DL(Node);
5864 EVT ResTy = Node->getValueType(0);
5865 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5866
5867 // Check the ImmArg.
5868 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5869 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5870 DAG.getContext()->emitError(Node->getOperationName(0) +
5871 ": argument out of range.");
5872 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5873 }
5874 return DAG.getConstant(
5876 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5877 DL, ResTy);
5878}
5879
5881 SDLoc DL(Node);
5882 EVT ResTy = Node->getValueType(0);
5883 SDValue Vec = Node->getOperand(2);
5884 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5885 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5886}
5887
5889 SDLoc DL(Node);
5890 EVT ResTy = Node->getValueType(0);
5891 SDValue One = DAG.getConstant(1, DL, ResTy);
5892 SDValue Bit =
5893 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5894
5895 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5896 DAG.getNOT(DL, Bit, ResTy));
5897}
5898
5899template <unsigned N>
5901 SDLoc DL(Node);
5902 EVT ResTy = Node->getValueType(0);
5903 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5904 // Check the unsigned ImmArg.
5905 if (!isUInt<N>(CImm->getZExtValue())) {
5906 DAG.getContext()->emitError(Node->getOperationName(0) +
5907 ": argument out of range.");
5908 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5909 }
5910
5911 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5912 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5913
5914 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5915}
5916
5917template <unsigned N>
5919 SDLoc DL(Node);
5920 EVT ResTy = Node->getValueType(0);
5921 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5922 // Check the unsigned ImmArg.
5923 if (!isUInt<N>(CImm->getZExtValue())) {
5924 DAG.getContext()->emitError(Node->getOperationName(0) +
5925 ": argument out of range.");
5926 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5927 }
5928
5929 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5930 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5931 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5932}
5933
5934template <unsigned N>
5936 SDLoc DL(Node);
5937 EVT ResTy = Node->getValueType(0);
5938 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5939 // Check the unsigned ImmArg.
5940 if (!isUInt<N>(CImm->getZExtValue())) {
5941 DAG.getContext()->emitError(Node->getOperationName(0) +
5942 ": argument out of range.");
5943 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5944 }
5945
5946 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5947 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5948 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5949}
5950
5951template <unsigned W>
5953 unsigned ResOp) {
5954 unsigned Imm = N->getConstantOperandVal(2);
5955 if (!isUInt<W>(Imm)) {
5956 const StringRef ErrorMsg = "argument out of range";
5957 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5958 return DAG.getUNDEF(N->getValueType(0));
5959 }
5960 SDLoc DL(N);
5961 SDValue Vec = N->getOperand(1);
5962 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
5964 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
5965}
5966
5967static SDValue
5970 const LoongArchSubtarget &Subtarget) {
5971 SDLoc DL(N);
5972 switch (N->getConstantOperandVal(0)) {
5973 default:
5974 break;
5975 case Intrinsic::loongarch_lsx_vadd_b:
5976 case Intrinsic::loongarch_lsx_vadd_h:
5977 case Intrinsic::loongarch_lsx_vadd_w:
5978 case Intrinsic::loongarch_lsx_vadd_d:
5979 case Intrinsic::loongarch_lasx_xvadd_b:
5980 case Intrinsic::loongarch_lasx_xvadd_h:
5981 case Intrinsic::loongarch_lasx_xvadd_w:
5982 case Intrinsic::loongarch_lasx_xvadd_d:
5983 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5984 N->getOperand(2));
5985 case Intrinsic::loongarch_lsx_vaddi_bu:
5986 case Intrinsic::loongarch_lsx_vaddi_hu:
5987 case Intrinsic::loongarch_lsx_vaddi_wu:
5988 case Intrinsic::loongarch_lsx_vaddi_du:
5989 case Intrinsic::loongarch_lasx_xvaddi_bu:
5990 case Intrinsic::loongarch_lasx_xvaddi_hu:
5991 case Intrinsic::loongarch_lasx_xvaddi_wu:
5992 case Intrinsic::loongarch_lasx_xvaddi_du:
5993 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5994 lowerVectorSplatImm<5>(N, 2, DAG));
5995 case Intrinsic::loongarch_lsx_vsub_b:
5996 case Intrinsic::loongarch_lsx_vsub_h:
5997 case Intrinsic::loongarch_lsx_vsub_w:
5998 case Intrinsic::loongarch_lsx_vsub_d:
5999 case Intrinsic::loongarch_lasx_xvsub_b:
6000 case Intrinsic::loongarch_lasx_xvsub_h:
6001 case Intrinsic::loongarch_lasx_xvsub_w:
6002 case Intrinsic::loongarch_lasx_xvsub_d:
6003 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6004 N->getOperand(2));
6005 case Intrinsic::loongarch_lsx_vsubi_bu:
6006 case Intrinsic::loongarch_lsx_vsubi_hu:
6007 case Intrinsic::loongarch_lsx_vsubi_wu:
6008 case Intrinsic::loongarch_lsx_vsubi_du:
6009 case Intrinsic::loongarch_lasx_xvsubi_bu:
6010 case Intrinsic::loongarch_lasx_xvsubi_hu:
6011 case Intrinsic::loongarch_lasx_xvsubi_wu:
6012 case Intrinsic::loongarch_lasx_xvsubi_du:
6013 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6014 lowerVectorSplatImm<5>(N, 2, DAG));
6015 case Intrinsic::loongarch_lsx_vneg_b:
6016 case Intrinsic::loongarch_lsx_vneg_h:
6017 case Intrinsic::loongarch_lsx_vneg_w:
6018 case Intrinsic::loongarch_lsx_vneg_d:
6019 case Intrinsic::loongarch_lasx_xvneg_b:
6020 case Intrinsic::loongarch_lasx_xvneg_h:
6021 case Intrinsic::loongarch_lasx_xvneg_w:
6022 case Intrinsic::loongarch_lasx_xvneg_d:
6023 return DAG.getNode(
6024 ISD::SUB, DL, N->getValueType(0),
6025 DAG.getConstant(
6026 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6027 /*isSigned=*/true),
6028 SDLoc(N), N->getValueType(0)),
6029 N->getOperand(1));
6030 case Intrinsic::loongarch_lsx_vmax_b:
6031 case Intrinsic::loongarch_lsx_vmax_h:
6032 case Intrinsic::loongarch_lsx_vmax_w:
6033 case Intrinsic::loongarch_lsx_vmax_d:
6034 case Intrinsic::loongarch_lasx_xvmax_b:
6035 case Intrinsic::loongarch_lasx_xvmax_h:
6036 case Intrinsic::loongarch_lasx_xvmax_w:
6037 case Intrinsic::loongarch_lasx_xvmax_d:
6038 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6039 N->getOperand(2));
6040 case Intrinsic::loongarch_lsx_vmax_bu:
6041 case Intrinsic::loongarch_lsx_vmax_hu:
6042 case Intrinsic::loongarch_lsx_vmax_wu:
6043 case Intrinsic::loongarch_lsx_vmax_du:
6044 case Intrinsic::loongarch_lasx_xvmax_bu:
6045 case Intrinsic::loongarch_lasx_xvmax_hu:
6046 case Intrinsic::loongarch_lasx_xvmax_wu:
6047 case Intrinsic::loongarch_lasx_xvmax_du:
6048 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6049 N->getOperand(2));
6050 case Intrinsic::loongarch_lsx_vmaxi_b:
6051 case Intrinsic::loongarch_lsx_vmaxi_h:
6052 case Intrinsic::loongarch_lsx_vmaxi_w:
6053 case Intrinsic::loongarch_lsx_vmaxi_d:
6054 case Intrinsic::loongarch_lasx_xvmaxi_b:
6055 case Intrinsic::loongarch_lasx_xvmaxi_h:
6056 case Intrinsic::loongarch_lasx_xvmaxi_w:
6057 case Intrinsic::loongarch_lasx_xvmaxi_d:
6058 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6059 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6060 case Intrinsic::loongarch_lsx_vmaxi_bu:
6061 case Intrinsic::loongarch_lsx_vmaxi_hu:
6062 case Intrinsic::loongarch_lsx_vmaxi_wu:
6063 case Intrinsic::loongarch_lsx_vmaxi_du:
6064 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6065 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6066 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6067 case Intrinsic::loongarch_lasx_xvmaxi_du:
6068 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6069 lowerVectorSplatImm<5>(N, 2, DAG));
6070 case Intrinsic::loongarch_lsx_vmin_b:
6071 case Intrinsic::loongarch_lsx_vmin_h:
6072 case Intrinsic::loongarch_lsx_vmin_w:
6073 case Intrinsic::loongarch_lsx_vmin_d:
6074 case Intrinsic::loongarch_lasx_xvmin_b:
6075 case Intrinsic::loongarch_lasx_xvmin_h:
6076 case Intrinsic::loongarch_lasx_xvmin_w:
6077 case Intrinsic::loongarch_lasx_xvmin_d:
6078 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6079 N->getOperand(2));
6080 case Intrinsic::loongarch_lsx_vmin_bu:
6081 case Intrinsic::loongarch_lsx_vmin_hu:
6082 case Intrinsic::loongarch_lsx_vmin_wu:
6083 case Intrinsic::loongarch_lsx_vmin_du:
6084 case Intrinsic::loongarch_lasx_xvmin_bu:
6085 case Intrinsic::loongarch_lasx_xvmin_hu:
6086 case Intrinsic::loongarch_lasx_xvmin_wu:
6087 case Intrinsic::loongarch_lasx_xvmin_du:
6088 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6089 N->getOperand(2));
6090 case Intrinsic::loongarch_lsx_vmini_b:
6091 case Intrinsic::loongarch_lsx_vmini_h:
6092 case Intrinsic::loongarch_lsx_vmini_w:
6093 case Intrinsic::loongarch_lsx_vmini_d:
6094 case Intrinsic::loongarch_lasx_xvmini_b:
6095 case Intrinsic::loongarch_lasx_xvmini_h:
6096 case Intrinsic::loongarch_lasx_xvmini_w:
6097 case Intrinsic::loongarch_lasx_xvmini_d:
6098 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6099 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6100 case Intrinsic::loongarch_lsx_vmini_bu:
6101 case Intrinsic::loongarch_lsx_vmini_hu:
6102 case Intrinsic::loongarch_lsx_vmini_wu:
6103 case Intrinsic::loongarch_lsx_vmini_du:
6104 case Intrinsic::loongarch_lasx_xvmini_bu:
6105 case Intrinsic::loongarch_lasx_xvmini_hu:
6106 case Intrinsic::loongarch_lasx_xvmini_wu:
6107 case Intrinsic::loongarch_lasx_xvmini_du:
6108 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6109 lowerVectorSplatImm<5>(N, 2, DAG));
6110 case Intrinsic::loongarch_lsx_vmul_b:
6111 case Intrinsic::loongarch_lsx_vmul_h:
6112 case Intrinsic::loongarch_lsx_vmul_w:
6113 case Intrinsic::loongarch_lsx_vmul_d:
6114 case Intrinsic::loongarch_lasx_xvmul_b:
6115 case Intrinsic::loongarch_lasx_xvmul_h:
6116 case Intrinsic::loongarch_lasx_xvmul_w:
6117 case Intrinsic::loongarch_lasx_xvmul_d:
6118 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6119 N->getOperand(2));
6120 case Intrinsic::loongarch_lsx_vmadd_b:
6121 case Intrinsic::loongarch_lsx_vmadd_h:
6122 case Intrinsic::loongarch_lsx_vmadd_w:
6123 case Intrinsic::loongarch_lsx_vmadd_d:
6124 case Intrinsic::loongarch_lasx_xvmadd_b:
6125 case Intrinsic::loongarch_lasx_xvmadd_h:
6126 case Intrinsic::loongarch_lasx_xvmadd_w:
6127 case Intrinsic::loongarch_lasx_xvmadd_d: {
6128 EVT ResTy = N->getValueType(0);
6129 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6130 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6131 N->getOperand(3)));
6132 }
6133 case Intrinsic::loongarch_lsx_vmsub_b:
6134 case Intrinsic::loongarch_lsx_vmsub_h:
6135 case Intrinsic::loongarch_lsx_vmsub_w:
6136 case Intrinsic::loongarch_lsx_vmsub_d:
6137 case Intrinsic::loongarch_lasx_xvmsub_b:
6138 case Intrinsic::loongarch_lasx_xvmsub_h:
6139 case Intrinsic::loongarch_lasx_xvmsub_w:
6140 case Intrinsic::loongarch_lasx_xvmsub_d: {
6141 EVT ResTy = N->getValueType(0);
6142 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6143 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6144 N->getOperand(3)));
6145 }
6146 case Intrinsic::loongarch_lsx_vdiv_b:
6147 case Intrinsic::loongarch_lsx_vdiv_h:
6148 case Intrinsic::loongarch_lsx_vdiv_w:
6149 case Intrinsic::loongarch_lsx_vdiv_d:
6150 case Intrinsic::loongarch_lasx_xvdiv_b:
6151 case Intrinsic::loongarch_lasx_xvdiv_h:
6152 case Intrinsic::loongarch_lasx_xvdiv_w:
6153 case Intrinsic::loongarch_lasx_xvdiv_d:
6154 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6155 N->getOperand(2));
6156 case Intrinsic::loongarch_lsx_vdiv_bu:
6157 case Intrinsic::loongarch_lsx_vdiv_hu:
6158 case Intrinsic::loongarch_lsx_vdiv_wu:
6159 case Intrinsic::loongarch_lsx_vdiv_du:
6160 case Intrinsic::loongarch_lasx_xvdiv_bu:
6161 case Intrinsic::loongarch_lasx_xvdiv_hu:
6162 case Intrinsic::loongarch_lasx_xvdiv_wu:
6163 case Intrinsic::loongarch_lasx_xvdiv_du:
6164 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6165 N->getOperand(2));
6166 case Intrinsic::loongarch_lsx_vmod_b:
6167 case Intrinsic::loongarch_lsx_vmod_h:
6168 case Intrinsic::loongarch_lsx_vmod_w:
6169 case Intrinsic::loongarch_lsx_vmod_d:
6170 case Intrinsic::loongarch_lasx_xvmod_b:
6171 case Intrinsic::loongarch_lasx_xvmod_h:
6172 case Intrinsic::loongarch_lasx_xvmod_w:
6173 case Intrinsic::loongarch_lasx_xvmod_d:
6174 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6175 N->getOperand(2));
6176 case Intrinsic::loongarch_lsx_vmod_bu:
6177 case Intrinsic::loongarch_lsx_vmod_hu:
6178 case Intrinsic::loongarch_lsx_vmod_wu:
6179 case Intrinsic::loongarch_lsx_vmod_du:
6180 case Intrinsic::loongarch_lasx_xvmod_bu:
6181 case Intrinsic::loongarch_lasx_xvmod_hu:
6182 case Intrinsic::loongarch_lasx_xvmod_wu:
6183 case Intrinsic::loongarch_lasx_xvmod_du:
6184 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6185 N->getOperand(2));
6186 case Intrinsic::loongarch_lsx_vand_v:
6187 case Intrinsic::loongarch_lasx_xvand_v:
6188 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6189 N->getOperand(2));
6190 case Intrinsic::loongarch_lsx_vor_v:
6191 case Intrinsic::loongarch_lasx_xvor_v:
6192 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6193 N->getOperand(2));
6194 case Intrinsic::loongarch_lsx_vxor_v:
6195 case Intrinsic::loongarch_lasx_xvxor_v:
6196 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6197 N->getOperand(2));
6198 case Intrinsic::loongarch_lsx_vnor_v:
6199 case Intrinsic::loongarch_lasx_xvnor_v: {
6200 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6201 N->getOperand(2));
6202 return DAG.getNOT(DL, Res, Res->getValueType(0));
6203 }
6204 case Intrinsic::loongarch_lsx_vandi_b:
6205 case Intrinsic::loongarch_lasx_xvandi_b:
6206 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6207 lowerVectorSplatImm<8>(N, 2, DAG));
6208 case Intrinsic::loongarch_lsx_vori_b:
6209 case Intrinsic::loongarch_lasx_xvori_b:
6210 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6211 lowerVectorSplatImm<8>(N, 2, DAG));
6212 case Intrinsic::loongarch_lsx_vxori_b:
6213 case Intrinsic::loongarch_lasx_xvxori_b:
6214 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6215 lowerVectorSplatImm<8>(N, 2, DAG));
6216 case Intrinsic::loongarch_lsx_vsll_b:
6217 case Intrinsic::loongarch_lsx_vsll_h:
6218 case Intrinsic::loongarch_lsx_vsll_w:
6219 case Intrinsic::loongarch_lsx_vsll_d:
6220 case Intrinsic::loongarch_lasx_xvsll_b:
6221 case Intrinsic::loongarch_lasx_xvsll_h:
6222 case Intrinsic::loongarch_lasx_xvsll_w:
6223 case Intrinsic::loongarch_lasx_xvsll_d:
6224 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6225 truncateVecElts(N, DAG));
6226 case Intrinsic::loongarch_lsx_vslli_b:
6227 case Intrinsic::loongarch_lasx_xvslli_b:
6228 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6229 lowerVectorSplatImm<3>(N, 2, DAG));
6230 case Intrinsic::loongarch_lsx_vslli_h:
6231 case Intrinsic::loongarch_lasx_xvslli_h:
6232 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6233 lowerVectorSplatImm<4>(N, 2, DAG));
6234 case Intrinsic::loongarch_lsx_vslli_w:
6235 case Intrinsic::loongarch_lasx_xvslli_w:
6236 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6237 lowerVectorSplatImm<5>(N, 2, DAG));
6238 case Intrinsic::loongarch_lsx_vslli_d:
6239 case Intrinsic::loongarch_lasx_xvslli_d:
6240 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6241 lowerVectorSplatImm<6>(N, 2, DAG));
6242 case Intrinsic::loongarch_lsx_vsrl_b:
6243 case Intrinsic::loongarch_lsx_vsrl_h:
6244 case Intrinsic::loongarch_lsx_vsrl_w:
6245 case Intrinsic::loongarch_lsx_vsrl_d:
6246 case Intrinsic::loongarch_lasx_xvsrl_b:
6247 case Intrinsic::loongarch_lasx_xvsrl_h:
6248 case Intrinsic::loongarch_lasx_xvsrl_w:
6249 case Intrinsic::loongarch_lasx_xvsrl_d:
6250 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6251 truncateVecElts(N, DAG));
6252 case Intrinsic::loongarch_lsx_vsrli_b:
6253 case Intrinsic::loongarch_lasx_xvsrli_b:
6254 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6255 lowerVectorSplatImm<3>(N, 2, DAG));
6256 case Intrinsic::loongarch_lsx_vsrli_h:
6257 case Intrinsic::loongarch_lasx_xvsrli_h:
6258 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6259 lowerVectorSplatImm<4>(N, 2, DAG));
6260 case Intrinsic::loongarch_lsx_vsrli_w:
6261 case Intrinsic::loongarch_lasx_xvsrli_w:
6262 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6263 lowerVectorSplatImm<5>(N, 2, DAG));
6264 case Intrinsic::loongarch_lsx_vsrli_d:
6265 case Intrinsic::loongarch_lasx_xvsrli_d:
6266 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6267 lowerVectorSplatImm<6>(N, 2, DAG));
6268 case Intrinsic::loongarch_lsx_vsra_b:
6269 case Intrinsic::loongarch_lsx_vsra_h:
6270 case Intrinsic::loongarch_lsx_vsra_w:
6271 case Intrinsic::loongarch_lsx_vsra_d:
6272 case Intrinsic::loongarch_lasx_xvsra_b:
6273 case Intrinsic::loongarch_lasx_xvsra_h:
6274 case Intrinsic::loongarch_lasx_xvsra_w:
6275 case Intrinsic::loongarch_lasx_xvsra_d:
6276 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6277 truncateVecElts(N, DAG));
6278 case Intrinsic::loongarch_lsx_vsrai_b:
6279 case Intrinsic::loongarch_lasx_xvsrai_b:
6280 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6281 lowerVectorSplatImm<3>(N, 2, DAG));
6282 case Intrinsic::loongarch_lsx_vsrai_h:
6283 case Intrinsic::loongarch_lasx_xvsrai_h:
6284 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6285 lowerVectorSplatImm<4>(N, 2, DAG));
6286 case Intrinsic::loongarch_lsx_vsrai_w:
6287 case Intrinsic::loongarch_lasx_xvsrai_w:
6288 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6289 lowerVectorSplatImm<5>(N, 2, DAG));
6290 case Intrinsic::loongarch_lsx_vsrai_d:
6291 case Intrinsic::loongarch_lasx_xvsrai_d:
6292 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6293 lowerVectorSplatImm<6>(N, 2, DAG));
6294 case Intrinsic::loongarch_lsx_vclz_b:
6295 case Intrinsic::loongarch_lsx_vclz_h:
6296 case Intrinsic::loongarch_lsx_vclz_w:
6297 case Intrinsic::loongarch_lsx_vclz_d:
6298 case Intrinsic::loongarch_lasx_xvclz_b:
6299 case Intrinsic::loongarch_lasx_xvclz_h:
6300 case Intrinsic::loongarch_lasx_xvclz_w:
6301 case Intrinsic::loongarch_lasx_xvclz_d:
6302 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6303 case Intrinsic::loongarch_lsx_vpcnt_b:
6304 case Intrinsic::loongarch_lsx_vpcnt_h:
6305 case Intrinsic::loongarch_lsx_vpcnt_w:
6306 case Intrinsic::loongarch_lsx_vpcnt_d:
6307 case Intrinsic::loongarch_lasx_xvpcnt_b:
6308 case Intrinsic::loongarch_lasx_xvpcnt_h:
6309 case Intrinsic::loongarch_lasx_xvpcnt_w:
6310 case Intrinsic::loongarch_lasx_xvpcnt_d:
6311 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6312 case Intrinsic::loongarch_lsx_vbitclr_b:
6313 case Intrinsic::loongarch_lsx_vbitclr_h:
6314 case Intrinsic::loongarch_lsx_vbitclr_w:
6315 case Intrinsic::loongarch_lsx_vbitclr_d:
6316 case Intrinsic::loongarch_lasx_xvbitclr_b:
6317 case Intrinsic::loongarch_lasx_xvbitclr_h:
6318 case Intrinsic::loongarch_lasx_xvbitclr_w:
6319 case Intrinsic::loongarch_lasx_xvbitclr_d:
6320 return lowerVectorBitClear(N, DAG);
6321 case Intrinsic::loongarch_lsx_vbitclri_b:
6322 case Intrinsic::loongarch_lasx_xvbitclri_b:
6323 return lowerVectorBitClearImm<3>(N, DAG);
6324 case Intrinsic::loongarch_lsx_vbitclri_h:
6325 case Intrinsic::loongarch_lasx_xvbitclri_h:
6326 return lowerVectorBitClearImm<4>(N, DAG);
6327 case Intrinsic::loongarch_lsx_vbitclri_w:
6328 case Intrinsic::loongarch_lasx_xvbitclri_w:
6329 return lowerVectorBitClearImm<5>(N, DAG);
6330 case Intrinsic::loongarch_lsx_vbitclri_d:
6331 case Intrinsic::loongarch_lasx_xvbitclri_d:
6332 return lowerVectorBitClearImm<6>(N, DAG);
6333 case Intrinsic::loongarch_lsx_vbitset_b:
6334 case Intrinsic::loongarch_lsx_vbitset_h:
6335 case Intrinsic::loongarch_lsx_vbitset_w:
6336 case Intrinsic::loongarch_lsx_vbitset_d:
6337 case Intrinsic::loongarch_lasx_xvbitset_b:
6338 case Intrinsic::loongarch_lasx_xvbitset_h:
6339 case Intrinsic::loongarch_lasx_xvbitset_w:
6340 case Intrinsic::loongarch_lasx_xvbitset_d: {
6341 EVT VecTy = N->getValueType(0);
6342 SDValue One = DAG.getConstant(1, DL, VecTy);
6343 return DAG.getNode(
6344 ISD::OR, DL, VecTy, N->getOperand(1),
6345 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6346 }
6347 case Intrinsic::loongarch_lsx_vbitseti_b:
6348 case Intrinsic::loongarch_lasx_xvbitseti_b:
6349 return lowerVectorBitSetImm<3>(N, DAG);
6350 case Intrinsic::loongarch_lsx_vbitseti_h:
6351 case Intrinsic::loongarch_lasx_xvbitseti_h:
6352 return lowerVectorBitSetImm<4>(N, DAG);
6353 case Intrinsic::loongarch_lsx_vbitseti_w:
6354 case Intrinsic::loongarch_lasx_xvbitseti_w:
6355 return lowerVectorBitSetImm<5>(N, DAG);
6356 case Intrinsic::loongarch_lsx_vbitseti_d:
6357 case Intrinsic::loongarch_lasx_xvbitseti_d:
6358 return lowerVectorBitSetImm<6>(N, DAG);
6359 case Intrinsic::loongarch_lsx_vbitrev_b:
6360 case Intrinsic::loongarch_lsx_vbitrev_h:
6361 case Intrinsic::loongarch_lsx_vbitrev_w:
6362 case Intrinsic::loongarch_lsx_vbitrev_d:
6363 case Intrinsic::loongarch_lasx_xvbitrev_b:
6364 case Intrinsic::loongarch_lasx_xvbitrev_h:
6365 case Intrinsic::loongarch_lasx_xvbitrev_w:
6366 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6367 EVT VecTy = N->getValueType(0);
6368 SDValue One = DAG.getConstant(1, DL, VecTy);
6369 return DAG.getNode(
6370 ISD::XOR, DL, VecTy, N->getOperand(1),
6371 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6372 }
6373 case Intrinsic::loongarch_lsx_vbitrevi_b:
6374 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6375 return lowerVectorBitRevImm<3>(N, DAG);
6376 case Intrinsic::loongarch_lsx_vbitrevi_h:
6377 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6378 return lowerVectorBitRevImm<4>(N, DAG);
6379 case Intrinsic::loongarch_lsx_vbitrevi_w:
6380 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6381 return lowerVectorBitRevImm<5>(N, DAG);
6382 case Intrinsic::loongarch_lsx_vbitrevi_d:
6383 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6384 return lowerVectorBitRevImm<6>(N, DAG);
6385 case Intrinsic::loongarch_lsx_vfadd_s:
6386 case Intrinsic::loongarch_lsx_vfadd_d:
6387 case Intrinsic::loongarch_lasx_xvfadd_s:
6388 case Intrinsic::loongarch_lasx_xvfadd_d:
6389 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6390 N->getOperand(2));
6391 case Intrinsic::loongarch_lsx_vfsub_s:
6392 case Intrinsic::loongarch_lsx_vfsub_d:
6393 case Intrinsic::loongarch_lasx_xvfsub_s:
6394 case Intrinsic::loongarch_lasx_xvfsub_d:
6395 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6396 N->getOperand(2));
6397 case Intrinsic::loongarch_lsx_vfmul_s:
6398 case Intrinsic::loongarch_lsx_vfmul_d:
6399 case Intrinsic::loongarch_lasx_xvfmul_s:
6400 case Intrinsic::loongarch_lasx_xvfmul_d:
6401 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6402 N->getOperand(2));
6403 case Intrinsic::loongarch_lsx_vfdiv_s:
6404 case Intrinsic::loongarch_lsx_vfdiv_d:
6405 case Intrinsic::loongarch_lasx_xvfdiv_s:
6406 case Intrinsic::loongarch_lasx_xvfdiv_d:
6407 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6408 N->getOperand(2));
6409 case Intrinsic::loongarch_lsx_vfmadd_s:
6410 case Intrinsic::loongarch_lsx_vfmadd_d:
6411 case Intrinsic::loongarch_lasx_xvfmadd_s:
6412 case Intrinsic::loongarch_lasx_xvfmadd_d:
6413 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6414 N->getOperand(2), N->getOperand(3));
6415 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6416 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6417 N->getOperand(1), N->getOperand(2),
6418 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6419 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6420 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6421 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6422 N->getOperand(1), N->getOperand(2),
6423 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6424 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6425 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6426 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6427 N->getOperand(1), N->getOperand(2),
6428 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6429 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6430 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6431 N->getOperand(1), N->getOperand(2),
6432 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6433 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6434 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6435 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6436 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6437 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6438 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6439 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6440 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6441 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6442 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6443 N->getOperand(1)));
6444 case Intrinsic::loongarch_lsx_vreplve_b:
6445 case Intrinsic::loongarch_lsx_vreplve_h:
6446 case Intrinsic::loongarch_lsx_vreplve_w:
6447 case Intrinsic::loongarch_lsx_vreplve_d:
6448 case Intrinsic::loongarch_lasx_xvreplve_b:
6449 case Intrinsic::loongarch_lasx_xvreplve_h:
6450 case Intrinsic::loongarch_lasx_xvreplve_w:
6451 case Intrinsic::loongarch_lasx_xvreplve_d:
6452 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6453 N->getOperand(1),
6454 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6455 N->getOperand(2)));
6456 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6457 if (!Subtarget.is64Bit())
6459 break;
6460 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6461 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6462 if (!Subtarget.is64Bit())
6464 break;
6465 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6466 if (!Subtarget.is64Bit())
6468 break;
6469 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6470 if (!Subtarget.is64Bit())
6472 break;
6473 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6474 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6475 if (!Subtarget.is64Bit())
6477 break;
6478 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6479 if (!Subtarget.is64Bit())
6481 break;
6482 case Intrinsic::loongarch_lsx_bz_b:
6483 case Intrinsic::loongarch_lsx_bz_h:
6484 case Intrinsic::loongarch_lsx_bz_w:
6485 case Intrinsic::loongarch_lsx_bz_d:
6486 case Intrinsic::loongarch_lasx_xbz_b:
6487 case Intrinsic::loongarch_lasx_xbz_h:
6488 case Intrinsic::loongarch_lasx_xbz_w:
6489 case Intrinsic::loongarch_lasx_xbz_d:
6490 if (!Subtarget.is64Bit())
6491 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6492 N->getOperand(1));
6493 break;
6494 case Intrinsic::loongarch_lsx_bz_v:
6495 case Intrinsic::loongarch_lasx_xbz_v:
6496 if (!Subtarget.is64Bit())
6497 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6498 N->getOperand(1));
6499 break;
6500 case Intrinsic::loongarch_lsx_bnz_b:
6501 case Intrinsic::loongarch_lsx_bnz_h:
6502 case Intrinsic::loongarch_lsx_bnz_w:
6503 case Intrinsic::loongarch_lsx_bnz_d:
6504 case Intrinsic::loongarch_lasx_xbnz_b:
6505 case Intrinsic::loongarch_lasx_xbnz_h:
6506 case Intrinsic::loongarch_lasx_xbnz_w:
6507 case Intrinsic::loongarch_lasx_xbnz_d:
6508 if (!Subtarget.is64Bit())
6509 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6510 N->getOperand(1));
6511 break;
6512 case Intrinsic::loongarch_lsx_bnz_v:
6513 case Intrinsic::loongarch_lasx_xbnz_v:
6514 if (!Subtarget.is64Bit())
6515 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6516 N->getOperand(1));
6517 break;
6518 }
6519 return SDValue();
6520}
6521
6524 const LoongArchSubtarget &Subtarget) {
6525 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6526 // conversion is unnecessary and can be replaced with the
6527 // MOVFR2GR_S_LA64 operand.
6528 SDValue Op0 = N->getOperand(0);
6530 return Op0.getOperand(0);
6531 return SDValue();
6532}
6533
6536 const LoongArchSubtarget &Subtarget) {
6537 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6538 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6539 // operand.
6540 SDValue Op0 = N->getOperand(0);
6542 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6543 "Unexpected value type!");
6544 return Op0.getOperand(0);
6545 }
6546 return SDValue();
6547}
6548
6551 const LoongArchSubtarget &Subtarget) {
6552 MVT VT = N->getSimpleValueType(0);
6553 unsigned NumBits = VT.getScalarSizeInBits();
6554
6555 // Simplify the inputs.
6556 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6557 APInt DemandedMask(APInt::getAllOnes(NumBits));
6558 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6559 return SDValue(N, 0);
6560
6561 return SDValue();
6562}
6563
6564static SDValue
6567 const LoongArchSubtarget &Subtarget) {
6568 SDValue Op0 = N->getOperand(0);
6569 SDLoc DL(N);
6570
6571 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6572 // redundant. Instead, use BuildPairF64's operands directly.
6574 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6575
6576 if (Op0->isUndef()) {
6577 SDValue Lo = DAG.getUNDEF(MVT::i32);
6578 SDValue Hi = DAG.getUNDEF(MVT::i32);
6579 return DCI.CombineTo(N, Lo, Hi);
6580 }
6581
6582 // It's cheaper to materialise two 32-bit integers than to load a double
6583 // from the constant pool and transfer it to integer registers through the
6584 // stack.
6586 APInt V = C->getValueAPF().bitcastToAPInt();
6587 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6588 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6589 return DCI.CombineTo(N, Lo, Hi);
6590 }
6591
6592 return SDValue();
6593}
6594
6595static SDValue
6598 const LoongArchSubtarget &Subtarget) {
6599 if (!DCI.isBeforeLegalize())
6600 return SDValue();
6601
6602 MVT EltVT = N->getSimpleValueType(0);
6603 SDValue Vec = N->getOperand(0);
6604 EVT VecTy = Vec->getValueType(0);
6605 SDValue Idx = N->getOperand(1);
6606 unsigned IdxOp = Idx.getOpcode();
6607 SDLoc DL(N);
6608
6609 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6610 return SDValue();
6611
6612 // Combine:
6613 // t2 = truncate t1
6614 // t3 = {zero/sign/any}_extend t2
6615 // t4 = extract_vector_elt t0, t3
6616 // to:
6617 // t4 = extract_vector_elt t0, t1
6618 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6619 IdxOp == ISD::ANY_EXTEND) {
6620 SDValue IdxOrig = Idx.getOperand(0);
6621 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6622 return SDValue();
6623
6624 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6625 IdxOrig.getOperand(0));
6626 }
6627
6628 return SDValue();
6629}
6630
6632 DAGCombinerInfo &DCI) const {
6633 SelectionDAG &DAG = DCI.DAG;
6634 switch (N->getOpcode()) {
6635 default:
6636 break;
6637 case ISD::AND:
6638 return performANDCombine(N, DAG, DCI, Subtarget);
6639 case ISD::OR:
6640 return performORCombine(N, DAG, DCI, Subtarget);
6641 case ISD::SETCC:
6642 return performSETCCCombine(N, DAG, DCI, Subtarget);
6643 case ISD::SRL:
6644 return performSRLCombine(N, DAG, DCI, Subtarget);
6645 case ISD::BITCAST:
6646 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6648 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6650 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6652 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6654 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6656 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6658 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6661 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6663 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6665 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6666 }
6667 return SDValue();
6668}
6669
6672 if (!ZeroDivCheck)
6673 return MBB;
6674
6675 // Build instructions:
6676 // MBB:
6677 // div(or mod) $dst, $dividend, $divisor
6678 // bne $divisor, $zero, SinkMBB
6679 // BreakMBB:
6680 // break 7 // BRK_DIVZERO
6681 // SinkMBB:
6682 // fallthrough
6683 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6684 MachineFunction::iterator It = ++MBB->getIterator();
6685 MachineFunction *MF = MBB->getParent();
6686 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6687 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6688 MF->insert(It, BreakMBB);
6689 MF->insert(It, SinkMBB);
6690
6691 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6692 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6693 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6694
6695 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6696 DebugLoc DL = MI.getDebugLoc();
6697 MachineOperand &Divisor = MI.getOperand(2);
6698 Register DivisorReg = Divisor.getReg();
6699
6700 // MBB:
6701 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6702 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6703 .addReg(LoongArch::R0)
6704 .addMBB(SinkMBB);
6705 MBB->addSuccessor(BreakMBB);
6706 MBB->addSuccessor(SinkMBB);
6707
6708 // BreakMBB:
6709 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6710 // definition of BRK_DIVZERO.
6711 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6712 BreakMBB->addSuccessor(SinkMBB);
6713
6714 // Clear Divisor's kill flag.
6715 Divisor.setIsKill(false);
6716
6717 return SinkMBB;
6718}
6719
6720static MachineBasicBlock *
6722 const LoongArchSubtarget &Subtarget) {
6723 unsigned CondOpc;
6724 switch (MI.getOpcode()) {
6725 default:
6726 llvm_unreachable("Unexpected opcode");
6727 case LoongArch::PseudoVBZ:
6728 CondOpc = LoongArch::VSETEQZ_V;
6729 break;
6730 case LoongArch::PseudoVBZ_B:
6731 CondOpc = LoongArch::VSETANYEQZ_B;
6732 break;
6733 case LoongArch::PseudoVBZ_H:
6734 CondOpc = LoongArch::VSETANYEQZ_H;
6735 break;
6736 case LoongArch::PseudoVBZ_W:
6737 CondOpc = LoongArch::VSETANYEQZ_W;
6738 break;
6739 case LoongArch::PseudoVBZ_D:
6740 CondOpc = LoongArch::VSETANYEQZ_D;
6741 break;
6742 case LoongArch::PseudoVBNZ:
6743 CondOpc = LoongArch::VSETNEZ_V;
6744 break;
6745 case LoongArch::PseudoVBNZ_B:
6746 CondOpc = LoongArch::VSETALLNEZ_B;
6747 break;
6748 case LoongArch::PseudoVBNZ_H:
6749 CondOpc = LoongArch::VSETALLNEZ_H;
6750 break;
6751 case LoongArch::PseudoVBNZ_W:
6752 CondOpc = LoongArch::VSETALLNEZ_W;
6753 break;
6754 case LoongArch::PseudoVBNZ_D:
6755 CondOpc = LoongArch::VSETALLNEZ_D;
6756 break;
6757 case LoongArch::PseudoXVBZ:
6758 CondOpc = LoongArch::XVSETEQZ_V;
6759 break;
6760 case LoongArch::PseudoXVBZ_B:
6761 CondOpc = LoongArch::XVSETANYEQZ_B;
6762 break;
6763 case LoongArch::PseudoXVBZ_H:
6764 CondOpc = LoongArch::XVSETANYEQZ_H;
6765 break;
6766 case LoongArch::PseudoXVBZ_W:
6767 CondOpc = LoongArch::XVSETANYEQZ_W;
6768 break;
6769 case LoongArch::PseudoXVBZ_D:
6770 CondOpc = LoongArch::XVSETANYEQZ_D;
6771 break;
6772 case LoongArch::PseudoXVBNZ:
6773 CondOpc = LoongArch::XVSETNEZ_V;
6774 break;
6775 case LoongArch::PseudoXVBNZ_B:
6776 CondOpc = LoongArch::XVSETALLNEZ_B;
6777 break;
6778 case LoongArch::PseudoXVBNZ_H:
6779 CondOpc = LoongArch::XVSETALLNEZ_H;
6780 break;
6781 case LoongArch::PseudoXVBNZ_W:
6782 CondOpc = LoongArch::XVSETALLNEZ_W;
6783 break;
6784 case LoongArch::PseudoXVBNZ_D:
6785 CondOpc = LoongArch::XVSETALLNEZ_D;
6786 break;
6787 }
6788
6789 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6790 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6791 DebugLoc DL = MI.getDebugLoc();
6794
6795 MachineFunction *F = BB->getParent();
6796 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6797 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6798 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6799
6800 F->insert(It, FalseBB);
6801 F->insert(It, TrueBB);
6802 F->insert(It, SinkBB);
6803
6804 // Transfer the remainder of MBB and its successor edges to Sink.
6805 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6807
6808 // Insert the real instruction to BB.
6809 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6810 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6811
6812 // Insert branch.
6813 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6814 BB->addSuccessor(FalseBB);
6815 BB->addSuccessor(TrueBB);
6816
6817 // FalseBB.
6818 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6819 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6820 .addReg(LoongArch::R0)
6821 .addImm(0);
6822 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6823 FalseBB->addSuccessor(SinkBB);
6824
6825 // TrueBB.
6826 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6827 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6828 .addReg(LoongArch::R0)
6829 .addImm(1);
6830 TrueBB->addSuccessor(SinkBB);
6831
6832 // SinkBB: merge the results.
6833 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6834 MI.getOperand(0).getReg())
6835 .addReg(RD1)
6836 .addMBB(FalseBB)
6837 .addReg(RD2)
6838 .addMBB(TrueBB);
6839
6840 // The pseudo instruction is gone now.
6841 MI.eraseFromParent();
6842 return SinkBB;
6843}
6844
6845static MachineBasicBlock *
6847 const LoongArchSubtarget &Subtarget) {
6848 unsigned InsOp;
6849 unsigned BroadcastOp;
6850 unsigned HalfSize;
6851 switch (MI.getOpcode()) {
6852 default:
6853 llvm_unreachable("Unexpected opcode");
6854 case LoongArch::PseudoXVINSGR2VR_B:
6855 HalfSize = 16;
6856 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6857 InsOp = LoongArch::XVEXTRINS_B;
6858 break;
6859 case LoongArch::PseudoXVINSGR2VR_H:
6860 HalfSize = 8;
6861 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6862 InsOp = LoongArch::XVEXTRINS_H;
6863 break;
6864 }
6865 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6866 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6867 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6868 DebugLoc DL = MI.getDebugLoc();
6870 // XDst = vector_insert XSrc, Elt, Idx
6871 Register XDst = MI.getOperand(0).getReg();
6872 Register XSrc = MI.getOperand(1).getReg();
6873 Register Elt = MI.getOperand(2).getReg();
6874 unsigned Idx = MI.getOperand(3).getImm();
6875
6876 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6877 Idx < HalfSize) {
6878 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6879 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6880
6881 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6882 .addReg(XSrc, 0, LoongArch::sub_128);
6883 BuildMI(*BB, MI, DL,
6884 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6885 : LoongArch::VINSGR2VR_B),
6886 ScratchSubReg2)
6887 .addReg(ScratchSubReg1)
6888 .addReg(Elt)
6889 .addImm(Idx);
6890
6891 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6892 .addImm(0)
6893 .addReg(ScratchSubReg2)
6894 .addImm(LoongArch::sub_128);
6895 } else {
6896 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6897 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6898
6899 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6900
6901 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6902 .addReg(ScratchReg1)
6903 .addReg(XSrc)
6904 .addImm(Idx >= HalfSize ? 48 : 18);
6905
6906 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6907 .addReg(XSrc)
6908 .addReg(ScratchReg2)
6909 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6910 }
6911
6912 MI.eraseFromParent();
6913 return BB;
6914}
6915
6918 const LoongArchSubtarget &Subtarget) {
6919 assert(Subtarget.hasExtLSX());
6920 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6921 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6922 DebugLoc DL = MI.getDebugLoc();
6924 Register Dst = MI.getOperand(0).getReg();
6925 Register Src = MI.getOperand(1).getReg();
6926 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6927 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6928 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6929
6930 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6931 BuildMI(*BB, MI, DL,
6932 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6933 : LoongArch::VINSGR2VR_W),
6934 ScratchReg2)
6935 .addReg(ScratchReg1)
6936 .addReg(Src)
6937 .addImm(0);
6938 BuildMI(
6939 *BB, MI, DL,
6940 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6941 ScratchReg3)
6942 .addReg(ScratchReg2);
6943 BuildMI(*BB, MI, DL,
6944 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6945 : LoongArch::VPICKVE2GR_W),
6946 Dst)
6947 .addReg(ScratchReg3)
6948 .addImm(0);
6949
6950 MI.eraseFromParent();
6951 return BB;
6952}
6953
6954static MachineBasicBlock *
6956 const LoongArchSubtarget &Subtarget) {
6957 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6958 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6959 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6961 Register Dst = MI.getOperand(0).getReg();
6962 Register Src = MI.getOperand(1).getReg();
6963 DebugLoc DL = MI.getDebugLoc();
6964 unsigned EleBits = 8;
6965 unsigned NotOpc = 0;
6966 unsigned MskOpc;
6967
6968 switch (MI.getOpcode()) {
6969 default:
6970 llvm_unreachable("Unexpected opcode");
6971 case LoongArch::PseudoVMSKLTZ_B:
6972 MskOpc = LoongArch::VMSKLTZ_B;
6973 break;
6974 case LoongArch::PseudoVMSKLTZ_H:
6975 MskOpc = LoongArch::VMSKLTZ_H;
6976 EleBits = 16;
6977 break;
6978 case LoongArch::PseudoVMSKLTZ_W:
6979 MskOpc = LoongArch::VMSKLTZ_W;
6980 EleBits = 32;
6981 break;
6982 case LoongArch::PseudoVMSKLTZ_D:
6983 MskOpc = LoongArch::VMSKLTZ_D;
6984 EleBits = 64;
6985 break;
6986 case LoongArch::PseudoVMSKGEZ_B:
6987 MskOpc = LoongArch::VMSKGEZ_B;
6988 break;
6989 case LoongArch::PseudoVMSKEQZ_B:
6990 MskOpc = LoongArch::VMSKNZ_B;
6991 NotOpc = LoongArch::VNOR_V;
6992 break;
6993 case LoongArch::PseudoVMSKNEZ_B:
6994 MskOpc = LoongArch::VMSKNZ_B;
6995 break;
6996 case LoongArch::PseudoXVMSKLTZ_B:
6997 MskOpc = LoongArch::XVMSKLTZ_B;
6998 RC = &LoongArch::LASX256RegClass;
6999 break;
7000 case LoongArch::PseudoXVMSKLTZ_H:
7001 MskOpc = LoongArch::XVMSKLTZ_H;
7002 RC = &LoongArch::LASX256RegClass;
7003 EleBits = 16;
7004 break;
7005 case LoongArch::PseudoXVMSKLTZ_W:
7006 MskOpc = LoongArch::XVMSKLTZ_W;
7007 RC = &LoongArch::LASX256RegClass;
7008 EleBits = 32;
7009 break;
7010 case LoongArch::PseudoXVMSKLTZ_D:
7011 MskOpc = LoongArch::XVMSKLTZ_D;
7012 RC = &LoongArch::LASX256RegClass;
7013 EleBits = 64;
7014 break;
7015 case LoongArch::PseudoXVMSKGEZ_B:
7016 MskOpc = LoongArch::XVMSKGEZ_B;
7017 RC = &LoongArch::LASX256RegClass;
7018 break;
7019 case LoongArch::PseudoXVMSKEQZ_B:
7020 MskOpc = LoongArch::XVMSKNZ_B;
7021 NotOpc = LoongArch::XVNOR_V;
7022 RC = &LoongArch::LASX256RegClass;
7023 break;
7024 case LoongArch::PseudoXVMSKNEZ_B:
7025 MskOpc = LoongArch::XVMSKNZ_B;
7026 RC = &LoongArch::LASX256RegClass;
7027 break;
7028 }
7029
7030 Register Msk = MRI.createVirtualRegister(RC);
7031 if (NotOpc) {
7032 Register Tmp = MRI.createVirtualRegister(RC);
7033 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7034 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7035 .addReg(Tmp, RegState::Kill)
7036 .addReg(Tmp, RegState::Kill);
7037 } else {
7038 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7039 }
7040
7041 if (TRI->getRegSizeInBits(*RC) > 128) {
7042 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7043 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7044 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7045 .addReg(Msk)
7046 .addImm(0);
7047 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7048 .addReg(Msk, RegState::Kill)
7049 .addImm(4);
7050 BuildMI(*BB, MI, DL,
7051 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7052 : LoongArch::BSTRINS_W),
7053 Dst)
7056 .addImm(256 / EleBits - 1)
7057 .addImm(128 / EleBits);
7058 } else {
7059 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7060 .addReg(Msk, RegState::Kill)
7061 .addImm(0);
7062 }
7063
7064 MI.eraseFromParent();
7065 return BB;
7066}
7067
7068static MachineBasicBlock *
7070 const LoongArchSubtarget &Subtarget) {
7071 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7072 "Unexpected instruction");
7073
7074 MachineFunction &MF = *BB->getParent();
7075 DebugLoc DL = MI.getDebugLoc();
7077 Register LoReg = MI.getOperand(0).getReg();
7078 Register HiReg = MI.getOperand(1).getReg();
7079 Register SrcReg = MI.getOperand(2).getReg();
7080
7081 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7082 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7083 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7084 MI.eraseFromParent(); // The pseudo instruction is gone now.
7085 return BB;
7086}
7087
7088static MachineBasicBlock *
7090 const LoongArchSubtarget &Subtarget) {
7091 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7092 "Unexpected instruction");
7093
7094 MachineFunction &MF = *BB->getParent();
7095 DebugLoc DL = MI.getDebugLoc();
7098 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7099 Register DstReg = MI.getOperand(0).getReg();
7100 Register LoReg = MI.getOperand(1).getReg();
7101 Register HiReg = MI.getOperand(2).getReg();
7102
7103 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7104 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7105 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7106 .addReg(TmpReg, RegState::Kill)
7107 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7108 MI.eraseFromParent(); // The pseudo instruction is gone now.
7109 return BB;
7110}
7111
7113 switch (MI.getOpcode()) {
7114 default:
7115 return false;
7116 case LoongArch::Select_GPR_Using_CC_GPR:
7117 return true;
7118 }
7119}
7120
7121static MachineBasicBlock *
7123 const LoongArchSubtarget &Subtarget) {
7124 // To "insert" Select_* instructions, we actually have to insert the triangle
7125 // control-flow pattern. The incoming instructions know the destination vreg
7126 // to set, the condition code register to branch on, the true/false values to
7127 // select between, and the condcode to use to select the appropriate branch.
7128 //
7129 // We produce the following control flow:
7130 // HeadMBB
7131 // | \
7132 // | IfFalseMBB
7133 // | /
7134 // TailMBB
7135 //
7136 // When we find a sequence of selects we attempt to optimize their emission
7137 // by sharing the control flow. Currently we only handle cases where we have
7138 // multiple selects with the exact same condition (same LHS, RHS and CC).
7139 // The selects may be interleaved with other instructions if the other
7140 // instructions meet some requirements we deem safe:
7141 // - They are not pseudo instructions.
7142 // - They are debug instructions. Otherwise,
7143 // - They do not have side-effects, do not access memory and their inputs do
7144 // not depend on the results of the select pseudo-instructions.
7145 // The TrueV/FalseV operands of the selects cannot depend on the result of
7146 // previous selects in the sequence.
7147 // These conditions could be further relaxed. See the X86 target for a
7148 // related approach and more information.
7149
7150 Register LHS = MI.getOperand(1).getReg();
7151 Register RHS;
7152 if (MI.getOperand(2).isReg())
7153 RHS = MI.getOperand(2).getReg();
7154 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7155
7156 SmallVector<MachineInstr *, 4> SelectDebugValues;
7157 SmallSet<Register, 4> SelectDests;
7158 SelectDests.insert(MI.getOperand(0).getReg());
7159
7160 MachineInstr *LastSelectPseudo = &MI;
7161 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7162 SequenceMBBI != E; ++SequenceMBBI) {
7163 if (SequenceMBBI->isDebugInstr())
7164 continue;
7165 if (isSelectPseudo(*SequenceMBBI)) {
7166 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7167 !SequenceMBBI->getOperand(2).isReg() ||
7168 SequenceMBBI->getOperand(2).getReg() != RHS ||
7169 SequenceMBBI->getOperand(3).getImm() != CC ||
7170 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7171 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7172 break;
7173 LastSelectPseudo = &*SequenceMBBI;
7174 SequenceMBBI->collectDebugValues(SelectDebugValues);
7175 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7176 continue;
7177 }
7178 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7179 SequenceMBBI->mayLoadOrStore() ||
7180 SequenceMBBI->usesCustomInsertionHook())
7181 break;
7182 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7183 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7184 }))
7185 break;
7186 }
7187
7188 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7189 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7190 DebugLoc DL = MI.getDebugLoc();
7192
7193 MachineBasicBlock *HeadMBB = BB;
7194 MachineFunction *F = BB->getParent();
7195 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7196 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7197
7198 F->insert(I, IfFalseMBB);
7199 F->insert(I, TailMBB);
7200
7201 // Set the call frame size on entry to the new basic blocks.
7202 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7203 IfFalseMBB->setCallFrameSize(CallFrameSize);
7204 TailMBB->setCallFrameSize(CallFrameSize);
7205
7206 // Transfer debug instructions associated with the selects to TailMBB.
7207 for (MachineInstr *DebugInstr : SelectDebugValues) {
7208 TailMBB->push_back(DebugInstr->removeFromParent());
7209 }
7210
7211 // Move all instructions after the sequence to TailMBB.
7212 TailMBB->splice(TailMBB->end(), HeadMBB,
7213 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7214 // Update machine-CFG edges by transferring all successors of the current
7215 // block to the new block which will contain the Phi nodes for the selects.
7216 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7217 // Set the successors for HeadMBB.
7218 HeadMBB->addSuccessor(IfFalseMBB);
7219 HeadMBB->addSuccessor(TailMBB);
7220
7221 // Insert appropriate branch.
7222 if (MI.getOperand(2).isImm())
7223 BuildMI(HeadMBB, DL, TII.get(CC))
7224 .addReg(LHS)
7225 .addImm(MI.getOperand(2).getImm())
7226 .addMBB(TailMBB);
7227 else
7228 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7229
7230 // IfFalseMBB just falls through to TailMBB.
7231 IfFalseMBB->addSuccessor(TailMBB);
7232
7233 // Create PHIs for all of the select pseudo-instructions.
7234 auto SelectMBBI = MI.getIterator();
7235 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7236 auto InsertionPoint = TailMBB->begin();
7237 while (SelectMBBI != SelectEnd) {
7238 auto Next = std::next(SelectMBBI);
7239 if (isSelectPseudo(*SelectMBBI)) {
7240 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7241 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7242 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7243 .addReg(SelectMBBI->getOperand(4).getReg())
7244 .addMBB(HeadMBB)
7245 .addReg(SelectMBBI->getOperand(5).getReg())
7246 .addMBB(IfFalseMBB);
7247 SelectMBBI->eraseFromParent();
7248 }
7249 SelectMBBI = Next;
7250 }
7251
7252 F->getProperties().resetNoPHIs();
7253 return TailMBB;
7254}
7255
7256MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7257 MachineInstr &MI, MachineBasicBlock *BB) const {
7258 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7259 DebugLoc DL = MI.getDebugLoc();
7260
7261 switch (MI.getOpcode()) {
7262 default:
7263 llvm_unreachable("Unexpected instr type to insert");
7264 case LoongArch::DIV_W:
7265 case LoongArch::DIV_WU:
7266 case LoongArch::MOD_W:
7267 case LoongArch::MOD_WU:
7268 case LoongArch::DIV_D:
7269 case LoongArch::DIV_DU:
7270 case LoongArch::MOD_D:
7271 case LoongArch::MOD_DU:
7272 return insertDivByZeroTrap(MI, BB);
7273 break;
7274 case LoongArch::WRFCSR: {
7275 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7276 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7277 .addReg(MI.getOperand(1).getReg());
7278 MI.eraseFromParent();
7279 return BB;
7280 }
7281 case LoongArch::RDFCSR: {
7282 MachineInstr *ReadFCSR =
7283 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7284 MI.getOperand(0).getReg())
7285 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7286 ReadFCSR->getOperand(1).setIsUndef();
7287 MI.eraseFromParent();
7288 return BB;
7289 }
7290 case LoongArch::Select_GPR_Using_CC_GPR:
7291 return emitSelectPseudo(MI, BB, Subtarget);
7292 case LoongArch::BuildPairF64Pseudo:
7293 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7294 case LoongArch::SplitPairF64Pseudo:
7295 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7296 case LoongArch::PseudoVBZ:
7297 case LoongArch::PseudoVBZ_B:
7298 case LoongArch::PseudoVBZ_H:
7299 case LoongArch::PseudoVBZ_W:
7300 case LoongArch::PseudoVBZ_D:
7301 case LoongArch::PseudoVBNZ:
7302 case LoongArch::PseudoVBNZ_B:
7303 case LoongArch::PseudoVBNZ_H:
7304 case LoongArch::PseudoVBNZ_W:
7305 case LoongArch::PseudoVBNZ_D:
7306 case LoongArch::PseudoXVBZ:
7307 case LoongArch::PseudoXVBZ_B:
7308 case LoongArch::PseudoXVBZ_H:
7309 case LoongArch::PseudoXVBZ_W:
7310 case LoongArch::PseudoXVBZ_D:
7311 case LoongArch::PseudoXVBNZ:
7312 case LoongArch::PseudoXVBNZ_B:
7313 case LoongArch::PseudoXVBNZ_H:
7314 case LoongArch::PseudoXVBNZ_W:
7315 case LoongArch::PseudoXVBNZ_D:
7316 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7317 case LoongArch::PseudoXVINSGR2VR_B:
7318 case LoongArch::PseudoXVINSGR2VR_H:
7319 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7320 case LoongArch::PseudoCTPOP:
7321 return emitPseudoCTPOP(MI, BB, Subtarget);
7322 case LoongArch::PseudoVMSKLTZ_B:
7323 case LoongArch::PseudoVMSKLTZ_H:
7324 case LoongArch::PseudoVMSKLTZ_W:
7325 case LoongArch::PseudoVMSKLTZ_D:
7326 case LoongArch::PseudoVMSKGEZ_B:
7327 case LoongArch::PseudoVMSKEQZ_B:
7328 case LoongArch::PseudoVMSKNEZ_B:
7329 case LoongArch::PseudoXVMSKLTZ_B:
7330 case LoongArch::PseudoXVMSKLTZ_H:
7331 case LoongArch::PseudoXVMSKLTZ_W:
7332 case LoongArch::PseudoXVMSKLTZ_D:
7333 case LoongArch::PseudoXVMSKGEZ_B:
7334 case LoongArch::PseudoXVMSKEQZ_B:
7335 case LoongArch::PseudoXVMSKNEZ_B:
7336 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7337 case TargetOpcode::STATEPOINT:
7338 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7339 // while bl call instruction (where statepoint will be lowered at the
7340 // end) has implicit def. This def is early-clobber as it will be set at
7341 // the moment of the call and earlier than any use is read.
7342 // Add this implicit dead def here as a workaround.
7343 MI.addOperand(*MI.getMF(),
7345 LoongArch::R1, /*isDef*/ true,
7346 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7347 /*isUndef*/ false, /*isEarlyClobber*/ true));
7348 if (!Subtarget.is64Bit())
7349 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7350 return emitPatchPoint(MI, BB);
7351 }
7352}
7353
7355 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7356 unsigned *Fast) const {
7357 if (!Subtarget.hasUAL())
7358 return false;
7359
7360 // TODO: set reasonable speed number.
7361 if (Fast)
7362 *Fast = 1;
7363 return true;
7364}
7365
7366const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7367 switch ((LoongArchISD::NodeType)Opcode) {
7369 break;
7370
7371#define NODE_NAME_CASE(node) \
7372 case LoongArchISD::node: \
7373 return "LoongArchISD::" #node;
7374
7375 // TODO: Add more target-dependent nodes later.
7376 NODE_NAME_CASE(CALL)
7377 NODE_NAME_CASE(CALL_MEDIUM)
7378 NODE_NAME_CASE(CALL_LARGE)
7379 NODE_NAME_CASE(RET)
7380 NODE_NAME_CASE(TAIL)
7381 NODE_NAME_CASE(TAIL_MEDIUM)
7382 NODE_NAME_CASE(TAIL_LARGE)
7383 NODE_NAME_CASE(SELECT_CC)
7384 NODE_NAME_CASE(BR_CC)
7385 NODE_NAME_CASE(BRCOND)
7386 NODE_NAME_CASE(SLL_W)
7387 NODE_NAME_CASE(SRA_W)
7388 NODE_NAME_CASE(SRL_W)
7389 NODE_NAME_CASE(BSTRINS)
7390 NODE_NAME_CASE(BSTRPICK)
7391 NODE_NAME_CASE(MOVGR2FR_W)
7392 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7393 NODE_NAME_CASE(MOVGR2FR_D)
7394 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7395 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7396 NODE_NAME_CASE(FTINT)
7397 NODE_NAME_CASE(BUILD_PAIR_F64)
7398 NODE_NAME_CASE(SPLIT_PAIR_F64)
7399 NODE_NAME_CASE(REVB_2H)
7400 NODE_NAME_CASE(REVB_2W)
7401 NODE_NAME_CASE(BITREV_4B)
7402 NODE_NAME_CASE(BITREV_8B)
7403 NODE_NAME_CASE(BITREV_W)
7404 NODE_NAME_CASE(ROTR_W)
7405 NODE_NAME_CASE(ROTL_W)
7406 NODE_NAME_CASE(DIV_W)
7407 NODE_NAME_CASE(DIV_WU)
7408 NODE_NAME_CASE(MOD_W)
7409 NODE_NAME_CASE(MOD_WU)
7410 NODE_NAME_CASE(CLZ_W)
7411 NODE_NAME_CASE(CTZ_W)
7412 NODE_NAME_CASE(DBAR)
7413 NODE_NAME_CASE(IBAR)
7414 NODE_NAME_CASE(BREAK)
7415 NODE_NAME_CASE(SYSCALL)
7416 NODE_NAME_CASE(CRC_W_B_W)
7417 NODE_NAME_CASE(CRC_W_H_W)
7418 NODE_NAME_CASE(CRC_W_W_W)
7419 NODE_NAME_CASE(CRC_W_D_W)
7420 NODE_NAME_CASE(CRCC_W_B_W)
7421 NODE_NAME_CASE(CRCC_W_H_W)
7422 NODE_NAME_CASE(CRCC_W_W_W)
7423 NODE_NAME_CASE(CRCC_W_D_W)
7424 NODE_NAME_CASE(CSRRD)
7425 NODE_NAME_CASE(CSRWR)
7426 NODE_NAME_CASE(CSRXCHG)
7427 NODE_NAME_CASE(IOCSRRD_B)
7428 NODE_NAME_CASE(IOCSRRD_H)
7429 NODE_NAME_CASE(IOCSRRD_W)
7430 NODE_NAME_CASE(IOCSRRD_D)
7431 NODE_NAME_CASE(IOCSRWR_B)
7432 NODE_NAME_CASE(IOCSRWR_H)
7433 NODE_NAME_CASE(IOCSRWR_W)
7434 NODE_NAME_CASE(IOCSRWR_D)
7435 NODE_NAME_CASE(CPUCFG)
7436 NODE_NAME_CASE(MOVGR2FCSR)
7437 NODE_NAME_CASE(MOVFCSR2GR)
7438 NODE_NAME_CASE(CACOP_D)
7439 NODE_NAME_CASE(CACOP_W)
7440 NODE_NAME_CASE(VSHUF)
7441 NODE_NAME_CASE(VPICKEV)
7442 NODE_NAME_CASE(VPICKOD)
7443 NODE_NAME_CASE(VPACKEV)
7444 NODE_NAME_CASE(VPACKOD)
7445 NODE_NAME_CASE(VILVL)
7446 NODE_NAME_CASE(VILVH)
7447 NODE_NAME_CASE(VSHUF4I)
7448 NODE_NAME_CASE(VREPLVEI)
7449 NODE_NAME_CASE(VREPLGR2VR)
7450 NODE_NAME_CASE(XVPERMI)
7451 NODE_NAME_CASE(XVPERM)
7452 NODE_NAME_CASE(XVREPLVE0)
7453 NODE_NAME_CASE(XVREPLVE0Q)
7454 NODE_NAME_CASE(VPICK_SEXT_ELT)
7455 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7456 NODE_NAME_CASE(VREPLVE)
7457 NODE_NAME_CASE(VALL_ZERO)
7458 NODE_NAME_CASE(VANY_ZERO)
7459 NODE_NAME_CASE(VALL_NONZERO)
7460 NODE_NAME_CASE(VANY_NONZERO)
7461 NODE_NAME_CASE(FRECIPE)
7462 NODE_NAME_CASE(FRSQRTE)
7463 NODE_NAME_CASE(VSLLI)
7464 NODE_NAME_CASE(VSRLI)
7465 NODE_NAME_CASE(VBSLL)
7466 NODE_NAME_CASE(VBSRL)
7467 NODE_NAME_CASE(VLDREPL)
7468 NODE_NAME_CASE(VMSKLTZ)
7469 NODE_NAME_CASE(VMSKGEZ)
7470 NODE_NAME_CASE(VMSKEQZ)
7471 NODE_NAME_CASE(VMSKNEZ)
7472 NODE_NAME_CASE(XVMSKLTZ)
7473 NODE_NAME_CASE(XVMSKGEZ)
7474 NODE_NAME_CASE(XVMSKEQZ)
7475 NODE_NAME_CASE(XVMSKNEZ)
7476 NODE_NAME_CASE(VHADDW)
7477 }
7478#undef NODE_NAME_CASE
7479 return nullptr;
7480}
7481
7482//===----------------------------------------------------------------------===//
7483// Calling Convention Implementation
7484//===----------------------------------------------------------------------===//
7485
7486// Eight general-purpose registers a0-a7 used for passing integer arguments,
7487// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7488// fixed-point arguments, and floating-point arguments when no FPR is available
7489// or with soft float ABI.
7490const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7491 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7492 LoongArch::R10, LoongArch::R11};
7493// Eight floating-point registers fa0-fa7 used for passing floating-point
7494// arguments, and fa0-fa1 are also used to return values.
7495const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7496 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7497 LoongArch::F6, LoongArch::F7};
7498// FPR32 and FPR64 alias each other.
7500 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7501 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7502
7503const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7504 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7505 LoongArch::VR6, LoongArch::VR7};
7506
7507const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7508 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7509 LoongArch::XR6, LoongArch::XR7};
7510
7511// Pass a 2*GRLen argument that has been split into two GRLen values through
7512// registers or the stack as necessary.
7513static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7514 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7515 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7516 ISD::ArgFlagsTy ArgFlags2) {
7517 unsigned GRLenInBytes = GRLen / 8;
7518 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7519 // At least one half can be passed via register.
7520 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7521 VA1.getLocVT(), CCValAssign::Full));
7522 } else {
7523 // Both halves must be passed on the stack, with proper alignment.
7524 Align StackAlign =
7525 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7526 State.addLoc(
7528 State.AllocateStack(GRLenInBytes, StackAlign),
7529 VA1.getLocVT(), CCValAssign::Full));
7530 State.addLoc(CCValAssign::getMem(
7531 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7532 LocVT2, CCValAssign::Full));
7533 return false;
7534 }
7535 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7536 // The second half can also be passed via register.
7537 State.addLoc(
7538 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7539 } else {
7540 // The second half is passed via the stack, without additional alignment.
7541 State.addLoc(CCValAssign::getMem(
7542 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7543 LocVT2, CCValAssign::Full));
7544 }
7545 return false;
7546}
7547
7548// Implements the LoongArch calling convention. Returns true upon failure.
7550 unsigned ValNo, MVT ValVT,
7551 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7552 CCState &State, bool IsRet, Type *OrigTy) {
7553 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7554 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7555 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7556 MVT LocVT = ValVT;
7557
7558 // Any return value split into more than two values can't be returned
7559 // directly.
7560 if (IsRet && ValNo > 1)
7561 return true;
7562
7563 // If passing a variadic argument, or if no FPR is available.
7564 bool UseGPRForFloat = true;
7565
7566 switch (ABI) {
7567 default:
7568 llvm_unreachable("Unexpected ABI");
7569 break;
7574 UseGPRForFloat = ArgFlags.isVarArg();
7575 break;
7578 break;
7579 }
7580
7581 // If this is a variadic argument, the LoongArch calling convention requires
7582 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7583 // byte alignment. An aligned register should be used regardless of whether
7584 // the original argument was split during legalisation or not. The argument
7585 // will not be passed by registers if the original type is larger than
7586 // 2*GRLen, so the register alignment rule does not apply.
7587 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7588 if (ArgFlags.isVarArg() &&
7589 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7590 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7591 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7592 // Skip 'odd' register if necessary.
7593 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7594 State.AllocateReg(ArgGPRs);
7595 }
7596
7597 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7598 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7599 State.getPendingArgFlags();
7600
7601 assert(PendingLocs.size() == PendingArgFlags.size() &&
7602 "PendingLocs and PendingArgFlags out of sync");
7603
7604 // FPR32 and FPR64 alias each other.
7605 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7606 UseGPRForFloat = true;
7607
7608 if (UseGPRForFloat && ValVT == MVT::f32) {
7609 LocVT = GRLenVT;
7610 LocInfo = CCValAssign::BCvt;
7611 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7612 LocVT = MVT::i64;
7613 LocInfo = CCValAssign::BCvt;
7614 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7615 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7616 // registers are exhausted.
7617 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7618 // Depending on available argument GPRS, f64 may be passed in a pair of
7619 // GPRs, split between a GPR and the stack, or passed completely on the
7620 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7621 // cases.
7622 MCRegister Reg = State.AllocateReg(ArgGPRs);
7623 if (!Reg) {
7624 int64_t StackOffset = State.AllocateStack(8, Align(8));
7625 State.addLoc(
7626 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7627 return false;
7628 }
7629 LocVT = MVT::i32;
7630 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7631 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7632 if (HiReg) {
7633 State.addLoc(
7634 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7635 } else {
7636 int64_t StackOffset = State.AllocateStack(4, Align(4));
7637 State.addLoc(
7638 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7639 }
7640 return false;
7641 }
7642
7643 // Split arguments might be passed indirectly, so keep track of the pending
7644 // values.
7645 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7646 LocVT = GRLenVT;
7647 LocInfo = CCValAssign::Indirect;
7648 PendingLocs.push_back(
7649 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7650 PendingArgFlags.push_back(ArgFlags);
7651 if (!ArgFlags.isSplitEnd()) {
7652 return false;
7653 }
7654 }
7655
7656 // If the split argument only had two elements, it should be passed directly
7657 // in registers or on the stack.
7658 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7659 PendingLocs.size() <= 2) {
7660 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7661 // Apply the normal calling convention rules to the first half of the
7662 // split argument.
7663 CCValAssign VA = PendingLocs[0];
7664 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7665 PendingLocs.clear();
7666 PendingArgFlags.clear();
7667 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7668 ArgFlags);
7669 }
7670
7671 // Allocate to a register if possible, or else a stack slot.
7672 Register Reg;
7673 unsigned StoreSizeBytes = GRLen / 8;
7674 Align StackAlign = Align(GRLen / 8);
7675
7676 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7677 Reg = State.AllocateReg(ArgFPR32s);
7678 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7679 Reg = State.AllocateReg(ArgFPR64s);
7680 } else if (ValVT.is128BitVector()) {
7681 Reg = State.AllocateReg(ArgVRs);
7682 UseGPRForFloat = false;
7683 StoreSizeBytes = 16;
7684 StackAlign = Align(16);
7685 } else if (ValVT.is256BitVector()) {
7686 Reg = State.AllocateReg(ArgXRs);
7687 UseGPRForFloat = false;
7688 StoreSizeBytes = 32;
7689 StackAlign = Align(32);
7690 } else {
7691 Reg = State.AllocateReg(ArgGPRs);
7692 }
7693
7694 unsigned StackOffset =
7695 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7696
7697 // If we reach this point and PendingLocs is non-empty, we must be at the
7698 // end of a split argument that must be passed indirectly.
7699 if (!PendingLocs.empty()) {
7700 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7701 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7702 for (auto &It : PendingLocs) {
7703 if (Reg)
7704 It.convertToReg(Reg);
7705 else
7706 It.convertToMem(StackOffset);
7707 State.addLoc(It);
7708 }
7709 PendingLocs.clear();
7710 PendingArgFlags.clear();
7711 return false;
7712 }
7713 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7714 "Expected an GRLenVT at this stage");
7715
7716 if (Reg) {
7717 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7718 return false;
7719 }
7720
7721 // When a floating-point value is passed on the stack, no bit-cast is needed.
7722 if (ValVT.isFloatingPoint()) {
7723 LocVT = ValVT;
7724 LocInfo = CCValAssign::Full;
7725 }
7726
7727 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7728 return false;
7729}
7730
7731void LoongArchTargetLowering::analyzeInputArgs(
7732 MachineFunction &MF, CCState &CCInfo,
7733 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7734 LoongArchCCAssignFn Fn) const {
7735 FunctionType *FType = MF.getFunction().getFunctionType();
7736 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7737 MVT ArgVT = Ins[i].VT;
7738 Type *ArgTy = nullptr;
7739 if (IsRet)
7740 ArgTy = FType->getReturnType();
7741 else if (Ins[i].isOrigArg())
7742 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7744 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7745 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7746 CCInfo, IsRet, ArgTy)) {
7747 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7748 << '\n');
7749 llvm_unreachable("");
7750 }
7751 }
7752}
7753
7754void LoongArchTargetLowering::analyzeOutputArgs(
7755 MachineFunction &MF, CCState &CCInfo,
7756 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7757 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7758 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7759 MVT ArgVT = Outs[i].VT;
7760 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7762 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7763 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7764 CCInfo, IsRet, OrigTy)) {
7765 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7766 << "\n");
7767 llvm_unreachable("");
7768 }
7769 }
7770}
7771
7772// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7773// values.
7775 const CCValAssign &VA, const SDLoc &DL) {
7776 switch (VA.getLocInfo()) {
7777 default:
7778 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7779 case CCValAssign::Full:
7781 break;
7782 case CCValAssign::BCvt:
7783 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7784 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7785 else
7786 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7787 break;
7788 }
7789 return Val;
7790}
7791
7793 const CCValAssign &VA, const SDLoc &DL,
7794 const ISD::InputArg &In,
7795 const LoongArchTargetLowering &TLI) {
7798 EVT LocVT = VA.getLocVT();
7799 SDValue Val;
7800 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7801 Register VReg = RegInfo.createVirtualRegister(RC);
7802 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7803 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7804
7805 // If input is sign extended from 32 bits, note it for the OptW pass.
7806 if (In.isOrigArg()) {
7807 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7808 if (OrigArg->getType()->isIntegerTy()) {
7809 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7810 // An input zero extended from i31 can also be considered sign extended.
7811 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7812 (BitWidth < 32 && In.Flags.isZExt())) {
7815 LAFI->addSExt32Register(VReg);
7816 }
7817 }
7818 }
7819
7820 return convertLocVTToValVT(DAG, Val, VA, DL);
7821}
7822
7823// The caller is responsible for loading the full value if the argument is
7824// passed with CCValAssign::Indirect.
7826 const CCValAssign &VA, const SDLoc &DL) {
7828 MachineFrameInfo &MFI = MF.getFrameInfo();
7829 EVT ValVT = VA.getValVT();
7830 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7831 /*IsImmutable=*/true);
7832 SDValue FIN = DAG.getFrameIndex(
7834
7835 ISD::LoadExtType ExtType;
7836 switch (VA.getLocInfo()) {
7837 default:
7838 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7839 case CCValAssign::Full:
7841 case CCValAssign::BCvt:
7842 ExtType = ISD::NON_EXTLOAD;
7843 break;
7844 }
7845 return DAG.getExtLoad(
7846 ExtType, DL, VA.getLocVT(), Chain, FIN,
7848}
7849
7851 const CCValAssign &VA,
7852 const CCValAssign &HiVA,
7853 const SDLoc &DL) {
7854 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7855 "Unexpected VA");
7857 MachineFrameInfo &MFI = MF.getFrameInfo();
7859
7860 assert(VA.isRegLoc() && "Expected register VA assignment");
7861
7862 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7863 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7864 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7865 SDValue Hi;
7866 if (HiVA.isMemLoc()) {
7867 // Second half of f64 is passed on the stack.
7868 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7869 /*IsImmutable=*/true);
7870 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7871 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7873 } else {
7874 // Second half of f64 is passed in another GPR.
7875 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7876 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7877 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7878 }
7879 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7880}
7881
7883 const CCValAssign &VA, const SDLoc &DL) {
7884 EVT LocVT = VA.getLocVT();
7885
7886 switch (VA.getLocInfo()) {
7887 default:
7888 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7889 case CCValAssign::Full:
7890 break;
7891 case CCValAssign::BCvt:
7892 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7893 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7894 else
7895 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7896 break;
7897 }
7898 return Val;
7899}
7900
7901static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7902 CCValAssign::LocInfo LocInfo,
7903 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7904 CCState &State) {
7905 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7906 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7907 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7908 static const MCPhysReg GPRList[] = {
7909 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7910 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7911 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7912 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7913 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7914 return false;
7915 }
7916 }
7917
7918 if (LocVT == MVT::f32) {
7919 // Pass in STG registers: F1, F2, F3, F4
7920 // fs0,fs1,fs2,fs3
7921 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7922 LoongArch::F26, LoongArch::F27};
7923 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7924 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7925 return false;
7926 }
7927 }
7928
7929 if (LocVT == MVT::f64) {
7930 // Pass in STG registers: D1, D2, D3, D4
7931 // fs4,fs5,fs6,fs7
7932 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7933 LoongArch::F30_64, LoongArch::F31_64};
7934 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7935 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7936 return false;
7937 }
7938 }
7939
7940 report_fatal_error("No registers left in GHC calling convention");
7941 return true;
7942}
7943
7944// Transform physical registers into virtual registers.
7946 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7947 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7948 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7949
7951
7952 switch (CallConv) {
7953 default:
7954 llvm_unreachable("Unsupported calling convention");
7955 case CallingConv::C:
7956 case CallingConv::Fast:
7958 break;
7959 case CallingConv::GHC:
7960 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7961 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7963 "GHC calling convention requires the F and D extensions");
7964 }
7965
7966 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7967 MVT GRLenVT = Subtarget.getGRLenVT();
7968 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7969 // Used with varargs to acumulate store chains.
7970 std::vector<SDValue> OutChains;
7971
7972 // Assign locations to all of the incoming arguments.
7974 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7975
7976 if (CallConv == CallingConv::GHC)
7978 else
7979 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7980
7981 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7982 CCValAssign &VA = ArgLocs[i];
7983 SDValue ArgValue;
7984 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7985 // case.
7986 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7987 assert(VA.needsCustom());
7988 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7989 } else if (VA.isRegLoc())
7990 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7991 else
7992 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7993 if (VA.getLocInfo() == CCValAssign::Indirect) {
7994 // If the original argument was split and passed by reference, we need to
7995 // load all parts of it here (using the same address).
7996 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7998 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7999 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8000 assert(ArgPartOffset == 0);
8001 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8002 CCValAssign &PartVA = ArgLocs[i + 1];
8003 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8004 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8005 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8006 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8008 ++i;
8009 ++InsIdx;
8010 }
8011 continue;
8012 }
8013 InVals.push_back(ArgValue);
8014 }
8015
8016 if (IsVarArg) {
8018 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8019 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8020 MachineFrameInfo &MFI = MF.getFrameInfo();
8021 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8022 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8023
8024 // Offset of the first variable argument from stack pointer, and size of
8025 // the vararg save area. For now, the varargs save area is either zero or
8026 // large enough to hold a0-a7.
8027 int VaArgOffset, VarArgsSaveSize;
8028
8029 // If all registers are allocated, then all varargs must be passed on the
8030 // stack and we don't need to save any argregs.
8031 if (ArgRegs.size() == Idx) {
8032 VaArgOffset = CCInfo.getStackSize();
8033 VarArgsSaveSize = 0;
8034 } else {
8035 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8036 VaArgOffset = -VarArgsSaveSize;
8037 }
8038
8039 // Record the frame index of the first variable argument
8040 // which is a value necessary to VASTART.
8041 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8042 LoongArchFI->setVarArgsFrameIndex(FI);
8043
8044 // If saving an odd number of registers then create an extra stack slot to
8045 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8046 // offsets to even-numbered registered remain 2*GRLen-aligned.
8047 if (Idx % 2) {
8048 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8049 true);
8050 VarArgsSaveSize += GRLenInBytes;
8051 }
8052
8053 // Copy the integer registers that may have been used for passing varargs
8054 // to the vararg save area.
8055 for (unsigned I = Idx; I < ArgRegs.size();
8056 ++I, VaArgOffset += GRLenInBytes) {
8057 const Register Reg = RegInfo.createVirtualRegister(RC);
8058 RegInfo.addLiveIn(ArgRegs[I], Reg);
8059 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8060 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8061 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8062 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8064 cast<StoreSDNode>(Store.getNode())
8065 ->getMemOperand()
8066 ->setValue((Value *)nullptr);
8067 OutChains.push_back(Store);
8068 }
8069 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8070 }
8071
8072 // All stores are grouped in one node to allow the matching between
8073 // the size of Ins and InVals. This only happens for vararg functions.
8074 if (!OutChains.empty()) {
8075 OutChains.push_back(Chain);
8076 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8077 }
8078
8079 return Chain;
8080}
8081
8083 return CI->isTailCall();
8084}
8085
8086// Check if the return value is used as only a return value, as otherwise
8087// we can't perform a tail-call.
8089 SDValue &Chain) const {
8090 if (N->getNumValues() != 1)
8091 return false;
8092 if (!N->hasNUsesOfValue(1, 0))
8093 return false;
8094
8095 SDNode *Copy = *N->user_begin();
8096 if (Copy->getOpcode() != ISD::CopyToReg)
8097 return false;
8098
8099 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8100 // isn't safe to perform a tail call.
8101 if (Copy->getGluedNode())
8102 return false;
8103
8104 // The copy must be used by a LoongArchISD::RET, and nothing else.
8105 bool HasRet = false;
8106 for (SDNode *Node : Copy->users()) {
8107 if (Node->getOpcode() != LoongArchISD::RET)
8108 return false;
8109 HasRet = true;
8110 }
8111
8112 if (!HasRet)
8113 return false;
8114
8115 Chain = Copy->getOperand(0);
8116 return true;
8117}
8118
8119// Check whether the call is eligible for tail call optimization.
8120bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8121 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8122 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8123
8124 auto CalleeCC = CLI.CallConv;
8125 auto &Outs = CLI.Outs;
8126 auto &Caller = MF.getFunction();
8127 auto CallerCC = Caller.getCallingConv();
8128
8129 // Do not tail call opt if the stack is used to pass parameters.
8130 if (CCInfo.getStackSize() != 0)
8131 return false;
8132
8133 // Do not tail call opt if any parameters need to be passed indirectly.
8134 for (auto &VA : ArgLocs)
8135 if (VA.getLocInfo() == CCValAssign::Indirect)
8136 return false;
8137
8138 // Do not tail call opt if either caller or callee uses struct return
8139 // semantics.
8140 auto IsCallerStructRet = Caller.hasStructRetAttr();
8141 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8142 if (IsCallerStructRet || IsCalleeStructRet)
8143 return false;
8144
8145 // Do not tail call opt if either the callee or caller has a byval argument.
8146 for (auto &Arg : Outs)
8147 if (Arg.Flags.isByVal())
8148 return false;
8149
8150 // The callee has to preserve all registers the caller needs to preserve.
8151 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8152 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8153 if (CalleeCC != CallerCC) {
8154 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8155 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8156 return false;
8157 }
8158 return true;
8159}
8160
8162 return DAG.getDataLayout().getPrefTypeAlign(
8163 VT.getTypeForEVT(*DAG.getContext()));
8164}
8165
8166// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8167// and output parameter nodes.
8168SDValue
8170 SmallVectorImpl<SDValue> &InVals) const {
8171 SelectionDAG &DAG = CLI.DAG;
8172 SDLoc &DL = CLI.DL;
8174 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8176 SDValue Chain = CLI.Chain;
8177 SDValue Callee = CLI.Callee;
8178 CallingConv::ID CallConv = CLI.CallConv;
8179 bool IsVarArg = CLI.IsVarArg;
8180 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8181 MVT GRLenVT = Subtarget.getGRLenVT();
8182 bool &IsTailCall = CLI.IsTailCall;
8183
8185
8186 // Analyze the operands of the call, assigning locations to each operand.
8188 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8189
8190 if (CallConv == CallingConv::GHC)
8191 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8192 else
8193 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8194
8195 // Check if it's really possible to do a tail call.
8196 if (IsTailCall)
8197 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8198
8199 if (IsTailCall)
8200 ++NumTailCalls;
8201 else if (CLI.CB && CLI.CB->isMustTailCall())
8202 report_fatal_error("failed to perform tail call elimination on a call "
8203 "site marked musttail");
8204
8205 // Get a count of how many bytes are to be pushed on the stack.
8206 unsigned NumBytes = ArgCCInfo.getStackSize();
8207
8208 // Create local copies for byval args.
8209 SmallVector<SDValue> ByValArgs;
8210 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8211 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8212 if (!Flags.isByVal())
8213 continue;
8214
8215 SDValue Arg = OutVals[i];
8216 unsigned Size = Flags.getByValSize();
8217 Align Alignment = Flags.getNonZeroByValAlign();
8218
8219 int FI =
8220 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8221 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8222 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8223
8224 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8225 /*IsVolatile=*/false,
8226 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8228 ByValArgs.push_back(FIPtr);
8229 }
8230
8231 if (!IsTailCall)
8232 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8233
8234 // Copy argument values to their designated locations.
8236 SmallVector<SDValue> MemOpChains;
8237 SDValue StackPtr;
8238 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8239 ++i, ++OutIdx) {
8240 CCValAssign &VA = ArgLocs[i];
8241 SDValue ArgValue = OutVals[OutIdx];
8242 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8243
8244 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8245 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8246 assert(VA.isRegLoc() && "Expected register VA assignment");
8247 assert(VA.needsCustom());
8248 SDValue SplitF64 =
8250 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8251 SDValue Lo = SplitF64.getValue(0);
8252 SDValue Hi = SplitF64.getValue(1);
8253
8254 Register RegLo = VA.getLocReg();
8255 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8256
8257 // Get the CCValAssign for the Hi part.
8258 CCValAssign &HiVA = ArgLocs[++i];
8259
8260 if (HiVA.isMemLoc()) {
8261 // Second half of f64 is passed on the stack.
8262 if (!StackPtr.getNode())
8263 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8265 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8266 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8267 // Emit the store.
8268 MemOpChains.push_back(DAG.getStore(
8269 Chain, DL, Hi, Address,
8271 } else {
8272 // Second half of f64 is passed in another GPR.
8273 Register RegHigh = HiVA.getLocReg();
8274 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8275 }
8276 continue;
8277 }
8278
8279 // Promote the value if needed.
8280 // For now, only handle fully promoted and indirect arguments.
8281 if (VA.getLocInfo() == CCValAssign::Indirect) {
8282 // Store the argument in a stack slot and pass its address.
8283 Align StackAlign =
8284 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8285 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8286 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8287 // If the original argument was split and passed by reference, we need to
8288 // store the required parts of it here (and pass just one address).
8289 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8290 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8291 assert(ArgPartOffset == 0);
8292 // Calculate the total size to store. We don't have access to what we're
8293 // actually storing other than performing the loop and collecting the
8294 // info.
8296 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8297 SDValue PartValue = OutVals[OutIdx + 1];
8298 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8299 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8300 EVT PartVT = PartValue.getValueType();
8301
8302 StoredSize += PartVT.getStoreSize();
8303 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8304 Parts.push_back(std::make_pair(PartValue, Offset));
8305 ++i;
8306 ++OutIdx;
8307 }
8308 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8309 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8310 MemOpChains.push_back(
8311 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8313 for (const auto &Part : Parts) {
8314 SDValue PartValue = Part.first;
8315 SDValue PartOffset = Part.second;
8317 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8318 MemOpChains.push_back(
8319 DAG.getStore(Chain, DL, PartValue, Address,
8321 }
8322 ArgValue = SpillSlot;
8323 } else {
8324 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8325 }
8326
8327 // Use local copy if it is a byval arg.
8328 if (Flags.isByVal())
8329 ArgValue = ByValArgs[j++];
8330
8331 if (VA.isRegLoc()) {
8332 // Queue up the argument copies and emit them at the end.
8333 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8334 } else {
8335 assert(VA.isMemLoc() && "Argument not register or memory");
8336 assert(!IsTailCall && "Tail call not allowed if stack is used "
8337 "for passing parameters");
8338
8339 // Work out the address of the stack slot.
8340 if (!StackPtr.getNode())
8341 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8343 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8345
8346 // Emit the store.
8347 MemOpChains.push_back(
8348 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8349 }
8350 }
8351
8352 // Join the stores, which are independent of one another.
8353 if (!MemOpChains.empty())
8354 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8355
8356 SDValue Glue;
8357
8358 // Build a sequence of copy-to-reg nodes, chained and glued together.
8359 for (auto &Reg : RegsToPass) {
8360 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8361 Glue = Chain.getValue(1);
8362 }
8363
8364 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8365 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8366 // split it and then direct call can be matched by PseudoCALL.
8368 const GlobalValue *GV = S->getGlobal();
8369 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8372 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8373 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8374 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8377 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8378 }
8379
8380 // The first call operand is the chain and the second is the target address.
8382 Ops.push_back(Chain);
8383 Ops.push_back(Callee);
8384
8385 // Add argument registers to the end of the list so that they are
8386 // known live into the call.
8387 for (auto &Reg : RegsToPass)
8388 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8389
8390 if (!IsTailCall) {
8391 // Add a register mask operand representing the call-preserved registers.
8392 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8393 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8394 assert(Mask && "Missing call preserved mask for calling convention");
8395 Ops.push_back(DAG.getRegisterMask(Mask));
8396 }
8397
8398 // Glue the call to the argument copies, if any.
8399 if (Glue.getNode())
8400 Ops.push_back(Glue);
8401
8402 // Emit the call.
8403 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8404 unsigned Op;
8405 switch (DAG.getTarget().getCodeModel()) {
8406 default:
8407 report_fatal_error("Unsupported code model");
8408 case CodeModel::Small:
8409 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8410 break;
8411 case CodeModel::Medium:
8412 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8414 break;
8415 case CodeModel::Large:
8416 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8418 break;
8419 }
8420
8421 if (IsTailCall) {
8423 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8424 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8425 return Ret;
8426 }
8427
8428 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8429 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8430 Glue = Chain.getValue(1);
8431
8432 // Mark the end of the call, which is glued to the call itself.
8433 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8434 Glue = Chain.getValue(1);
8435
8436 // Assign locations to each value returned by this call.
8438 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8439 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8440
8441 // Copy all of the result registers out of their specified physreg.
8442 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8443 auto &VA = RVLocs[i];
8444 // Copy the value out.
8445 SDValue RetValue =
8446 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8447 // Glue the RetValue to the end of the call sequence.
8448 Chain = RetValue.getValue(1);
8449 Glue = RetValue.getValue(2);
8450
8451 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8452 assert(VA.needsCustom());
8453 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8454 MVT::i32, Glue);
8455 Chain = RetValue2.getValue(1);
8456 Glue = RetValue2.getValue(2);
8457 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8458 RetValue, RetValue2);
8459 } else
8460 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8461
8462 InVals.push_back(RetValue);
8463 }
8464
8465 return Chain;
8466}
8467
8469 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8470 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8471 const Type *RetTy) const {
8473 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8474
8475 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8476 LoongArchABI::ABI ABI =
8477 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8478 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8479 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8480 return false;
8481 }
8482 return true;
8483}
8484
8486 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8488 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8489 SelectionDAG &DAG) const {
8490 // Stores the assignment of the return value to a location.
8492
8493 // Info about the registers and stack slot.
8494 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8495 *DAG.getContext());
8496
8497 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8498 nullptr, CC_LoongArch);
8499 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8500 report_fatal_error("GHC functions return void only");
8501 SDValue Glue;
8502 SmallVector<SDValue, 4> RetOps(1, Chain);
8503
8504 // Copy the result values into the output registers.
8505 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8506 SDValue Val = OutVals[OutIdx];
8507 CCValAssign &VA = RVLocs[i];
8508 assert(VA.isRegLoc() && "Can only return in registers!");
8509
8510 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8511 // Handle returning f64 on LA32D with a soft float ABI.
8512 assert(VA.isRegLoc() && "Expected return via registers");
8513 assert(VA.needsCustom());
8515 DAG.getVTList(MVT::i32, MVT::i32), Val);
8516 SDValue Lo = SplitF64.getValue(0);
8517 SDValue Hi = SplitF64.getValue(1);
8518 Register RegLo = VA.getLocReg();
8519 Register RegHi = RVLocs[++i].getLocReg();
8520
8521 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8522 Glue = Chain.getValue(1);
8523 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8524 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8525 Glue = Chain.getValue(1);
8526 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8527 } else {
8528 // Handle a 'normal' return.
8529 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8530 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8531
8532 // Guarantee that all emitted copies are stuck together.
8533 Glue = Chain.getValue(1);
8534 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8535 }
8536 }
8537
8538 RetOps[0] = Chain; // Update chain.
8539
8540 // Add the glue node if we have it.
8541 if (Glue.getNode())
8542 RetOps.push_back(Glue);
8543
8544 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8545}
8546
8547// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8548// Note: The following prefixes are excluded:
8549// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8550// as they can be represented using [x]vrepli.[whb]
8552 const APInt &SplatValue, const unsigned SplatBitSize) const {
8553 uint64_t RequiredImm = 0;
8554 uint64_t V = SplatValue.getZExtValue();
8555 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8556 // 4'b0101
8557 RequiredImm = (0b10101 << 8) | (V >> 8);
8558 return {true, RequiredImm};
8559 } else if (SplatBitSize == 32) {
8560 // 4'b0001
8561 if (!(V & 0xFFFF00FF)) {
8562 RequiredImm = (0b10001 << 8) | (V >> 8);
8563 return {true, RequiredImm};
8564 }
8565 // 4'b0010
8566 if (!(V & 0xFF00FFFF)) {
8567 RequiredImm = (0b10010 << 8) | (V >> 16);
8568 return {true, RequiredImm};
8569 }
8570 // 4'b0011
8571 if (!(V & 0x00FFFFFF)) {
8572 RequiredImm = (0b10011 << 8) | (V >> 24);
8573 return {true, RequiredImm};
8574 }
8575 // 4'b0110
8576 if ((V & 0xFFFF00FF) == 0xFF) {
8577 RequiredImm = (0b10110 << 8) | (V >> 8);
8578 return {true, RequiredImm};
8579 }
8580 // 4'b0111
8581 if ((V & 0xFF00FFFF) == 0xFFFF) {
8582 RequiredImm = (0b10111 << 8) | (V >> 16);
8583 return {true, RequiredImm};
8584 }
8585 // 4'b1010
8586 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8587 RequiredImm =
8588 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8589 return {true, RequiredImm};
8590 }
8591 } else if (SplatBitSize == 64) {
8592 // 4'b1011
8593 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8594 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8595 RequiredImm =
8596 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8597 return {true, RequiredImm};
8598 }
8599 // 4'b1100
8600 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8601 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8602 RequiredImm =
8603 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8604 return {true, RequiredImm};
8605 }
8606 // 4'b1001
8607 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8608 uint8_t res = 0;
8609 for (int i = 0; i < 8; ++i) {
8610 uint8_t byte = x & 0xFF;
8611 if (byte == 0 || byte == 0xFF)
8612 res |= ((byte & 1) << i);
8613 else
8614 return {false, 0};
8615 x >>= 8;
8616 }
8617 return {true, res};
8618 };
8619 auto [IsSame, Suffix] = sameBitsPreByte(V);
8620 if (IsSame) {
8621 RequiredImm = (0b11001 << 8) | Suffix;
8622 return {true, RequiredImm};
8623 }
8624 }
8625 return {false, RequiredImm};
8626}
8627
8629 EVT VT) const {
8630 if (!Subtarget.hasExtLSX())
8631 return false;
8632
8633 if (VT == MVT::f32) {
8634 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8635 return (masked == 0x3e000000 || masked == 0x40000000);
8636 }
8637
8638 if (VT == MVT::f64) {
8639 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8640 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8641 }
8642
8643 return false;
8644}
8645
8646bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8647 bool ForCodeSize) const {
8648 // TODO: Maybe need more checks here after vector extension is supported.
8649 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8650 return false;
8651 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8652 return false;
8653 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8654}
8655
8657 return true;
8658}
8659
8661 return true;
8662}
8663
8664bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8665 const Instruction *I) const {
8666 if (!Subtarget.is64Bit())
8667 return isa<LoadInst>(I) || isa<StoreInst>(I);
8668
8669 if (isa<LoadInst>(I))
8670 return true;
8671
8672 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8673 // require fences beacuse we can use amswap_db.[w/d].
8674 Type *Ty = I->getOperand(0)->getType();
8675 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8676 unsigned Size = Ty->getIntegerBitWidth();
8677 return (Size == 8 || Size == 16);
8678 }
8679
8680 return false;
8681}
8682
8684 LLVMContext &Context,
8685 EVT VT) const {
8686 if (!VT.isVector())
8687 return getPointerTy(DL);
8689}
8690
8692 EVT VT = Y.getValueType();
8693
8694 if (VT.isVector())
8695 return Subtarget.hasExtLSX() && VT.isInteger();
8696
8697 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8698}
8699
8701 const CallInst &I,
8702 MachineFunction &MF,
8703 unsigned Intrinsic) const {
8704 switch (Intrinsic) {
8705 default:
8706 return false;
8707 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8708 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8709 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8710 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8711 Info.opc = ISD::INTRINSIC_W_CHAIN;
8712 Info.memVT = MVT::i32;
8713 Info.ptrVal = I.getArgOperand(0);
8714 Info.offset = 0;
8715 Info.align = Align(4);
8718 return true;
8719 // TODO: Add more Intrinsics later.
8720 }
8721}
8722
8723// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8724// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8725// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8726// regression, we need to implement it manually.
8729
8731 Op == AtomicRMWInst::And) &&
8732 "Unable to expand");
8733 unsigned MinWordSize = 4;
8734
8735 IRBuilder<> Builder(AI);
8736 LLVMContext &Ctx = Builder.getContext();
8737 const DataLayout &DL = AI->getDataLayout();
8738 Type *ValueType = AI->getType();
8739 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8740
8741 Value *Addr = AI->getPointerOperand();
8742 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8743 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8744
8745 Value *AlignedAddr = Builder.CreateIntrinsic(
8746 Intrinsic::ptrmask, {PtrTy, IntTy},
8747 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8748 "AlignedAddr");
8749
8750 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8751 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8752 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8753 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8754 Value *Mask = Builder.CreateShl(
8755 ConstantInt::get(WordType,
8756 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8757 ShiftAmt, "Mask");
8758 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8759 Value *ValOperand_Shifted =
8760 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8761 ShiftAmt, "ValOperand_Shifted");
8762 Value *NewOperand;
8763 if (Op == AtomicRMWInst::And)
8764 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8765 else
8766 NewOperand = ValOperand_Shifted;
8767
8768 AtomicRMWInst *NewAI =
8769 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8770 AI->getOrdering(), AI->getSyncScopeID());
8771
8772 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8773 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8774 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8775 AI->replaceAllUsesWith(FinalOldResult);
8776 AI->eraseFromParent();
8777}
8778
8781 // TODO: Add more AtomicRMWInst that needs to be extended.
8782
8783 // Since floating-point operation requires a non-trivial set of data
8784 // operations, use CmpXChg to expand.
8785 if (AI->isFloatingPointOperation() ||
8791
8792 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8795 AI->getOperation() == AtomicRMWInst::Sub)) {
8797 }
8798
8799 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8800 if (Subtarget.hasLAMCAS()) {
8801 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8805 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8807 }
8808
8809 if (Size == 8 || Size == 16)
8812}
8813
8814static Intrinsic::ID
8816 AtomicRMWInst::BinOp BinOp) {
8817 if (GRLen == 64) {
8818 switch (BinOp) {
8819 default:
8820 llvm_unreachable("Unexpected AtomicRMW BinOp");
8822 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8823 case AtomicRMWInst::Add:
8824 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8825 case AtomicRMWInst::Sub:
8826 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8828 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8830 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8832 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8833 case AtomicRMWInst::Max:
8834 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8835 case AtomicRMWInst::Min:
8836 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8837 // TODO: support other AtomicRMWInst.
8838 }
8839 }
8840
8841 if (GRLen == 32) {
8842 switch (BinOp) {
8843 default:
8844 llvm_unreachable("Unexpected AtomicRMW BinOp");
8846 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8847 case AtomicRMWInst::Add:
8848 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8849 case AtomicRMWInst::Sub:
8850 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8852 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8854 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8856 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8857 case AtomicRMWInst::Max:
8858 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8859 case AtomicRMWInst::Min:
8860 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8861 // TODO: support other AtomicRMWInst.
8862 }
8863 }
8864
8865 llvm_unreachable("Unexpected GRLen\n");
8866}
8867
8870 AtomicCmpXchgInst *CI) const {
8871
8872 if (Subtarget.hasLAMCAS())
8874
8876 if (Size == 8 || Size == 16)
8879}
8880
8882 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8883 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8884 unsigned GRLen = Subtarget.getGRLen();
8885 AtomicOrdering FailOrd = CI->getFailureOrdering();
8886 Value *FailureOrdering =
8887 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8888 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8889 if (GRLen == 64) {
8890 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8891 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8892 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8893 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8894 }
8895 Type *Tys[] = {AlignedAddr->getType()};
8896 Value *Result = Builder.CreateIntrinsic(
8897 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8898 if (GRLen == 64)
8899 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8900 return Result;
8901}
8902
8904 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8905 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8906 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8907 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8908 // mask, as this produces better code than the LL/SC loop emitted by
8909 // int_loongarch_masked_atomicrmw_xchg.
8910 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8913 if (CVal->isZero())
8914 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8915 Builder.CreateNot(Mask, "Inv_Mask"),
8916 AI->getAlign(), Ord);
8917 if (CVal->isMinusOne())
8918 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8919 AI->getAlign(), Ord);
8920 }
8921
8922 unsigned GRLen = Subtarget.getGRLen();
8923 Value *Ordering =
8924 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8925 Type *Tys[] = {AlignedAddr->getType()};
8927 AI->getModule(),
8929
8930 if (GRLen == 64) {
8931 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8932 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8933 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8934 }
8935
8936 Value *Result;
8937
8938 // Must pass the shift amount needed to sign extend the loaded value prior
8939 // to performing a signed comparison for min/max. ShiftAmt is the number of
8940 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8941 // is the number of bits to left+right shift the value in order to
8942 // sign-extend.
8943 if (AI->getOperation() == AtomicRMWInst::Min ||
8945 const DataLayout &DL = AI->getDataLayout();
8946 unsigned ValWidth =
8947 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8948 Value *SextShamt =
8949 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8950 Result = Builder.CreateCall(LlwOpScwLoop,
8951 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8952 } else {
8953 Result =
8954 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8955 }
8956
8957 if (GRLen == 64)
8958 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8959 return Result;
8960}
8961
8963 const MachineFunction &MF, EVT VT) const {
8964 VT = VT.getScalarType();
8965
8966 if (!VT.isSimple())
8967 return false;
8968
8969 switch (VT.getSimpleVT().SimpleTy) {
8970 case MVT::f32:
8971 case MVT::f64:
8972 return true;
8973 default:
8974 break;
8975 }
8976
8977 return false;
8978}
8979
8981 const Constant *PersonalityFn) const {
8982 return LoongArch::R4;
8983}
8984
8986 const Constant *PersonalityFn) const {
8987 return LoongArch::R5;
8988}
8989
8990//===----------------------------------------------------------------------===//
8991// Target Optimization Hooks
8992//===----------------------------------------------------------------------===//
8993
8995 const LoongArchSubtarget &Subtarget) {
8996 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8997 // IEEE float has 23 digits and double has 52 digits.
8998 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8999 return RefinementSteps;
9000}
9001
9003 SelectionDAG &DAG, int Enabled,
9004 int &RefinementSteps,
9005 bool &UseOneConstNR,
9006 bool Reciprocal) const {
9007 if (Subtarget.hasFrecipe()) {
9008 SDLoc DL(Operand);
9009 EVT VT = Operand.getValueType();
9010
9011 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9012 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9013 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9014 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9015 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9016
9017 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9018 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9019
9020 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9021 if (Reciprocal)
9022 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9023
9024 return Estimate;
9025 }
9026 }
9027
9028 return SDValue();
9029}
9030
9032 SelectionDAG &DAG,
9033 int Enabled,
9034 int &RefinementSteps) const {
9035 if (Subtarget.hasFrecipe()) {
9036 SDLoc DL(Operand);
9037 EVT VT = Operand.getValueType();
9038
9039 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9040 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9041 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9042 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9043 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9044
9045 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9046 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9047
9048 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9049 }
9050 }
9051
9052 return SDValue();
9053}
9054
9055//===----------------------------------------------------------------------===//
9056// LoongArch Inline Assembly Support
9057//===----------------------------------------------------------------------===//
9058
9060LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9061 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9062 //
9063 // 'f': A floating-point register (if available).
9064 // 'k': A memory operand whose address is formed by a base register and
9065 // (optionally scaled) index register.
9066 // 'l': A signed 16-bit constant.
9067 // 'm': A memory operand whose address is formed by a base register and
9068 // offset that is suitable for use in instructions with the same
9069 // addressing mode as st.w and ld.w.
9070 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9071 // instruction)
9072 // 'I': A signed 12-bit constant (for arithmetic instructions).
9073 // 'J': Integer zero.
9074 // 'K': An unsigned 12-bit constant (for logic instructions).
9075 // "ZB": An address that is held in a general-purpose register. The offset is
9076 // zero.
9077 // "ZC": A memory operand whose address is formed by a base register and
9078 // offset that is suitable for use in instructions with the same
9079 // addressing mode as ll.w and sc.w.
9080 if (Constraint.size() == 1) {
9081 switch (Constraint[0]) {
9082 default:
9083 break;
9084 case 'f':
9085 case 'q':
9086 return C_RegisterClass;
9087 case 'l':
9088 case 'I':
9089 case 'J':
9090 case 'K':
9091 return C_Immediate;
9092 case 'k':
9093 return C_Memory;
9094 }
9095 }
9096
9097 if (Constraint == "ZC" || Constraint == "ZB")
9098 return C_Memory;
9099
9100 // 'm' is handled here.
9101 return TargetLowering::getConstraintType(Constraint);
9102}
9103
9104InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9105 StringRef ConstraintCode) const {
9106 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9110 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9111}
9112
9113std::pair<unsigned, const TargetRegisterClass *>
9114LoongArchTargetLowering::getRegForInlineAsmConstraint(
9115 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9116 // First, see if this is a constraint that directly corresponds to a LoongArch
9117 // register class.
9118 if (Constraint.size() == 1) {
9119 switch (Constraint[0]) {
9120 case 'r':
9121 // TODO: Support fixed vectors up to GRLen?
9122 if (VT.isVector())
9123 break;
9124 return std::make_pair(0U, &LoongArch::GPRRegClass);
9125 case 'q':
9126 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9127 case 'f':
9128 if (Subtarget.hasBasicF() && VT == MVT::f32)
9129 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9130 if (Subtarget.hasBasicD() && VT == MVT::f64)
9131 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9132 if (Subtarget.hasExtLSX() &&
9133 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9134 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9135 if (Subtarget.hasExtLASX() &&
9136 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9137 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9138 break;
9139 default:
9140 break;
9141 }
9142 }
9143
9144 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9145 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9146 // constraints while the official register name is prefixed with a '$'. So we
9147 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9148 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9149 // case insensitive, so no need to convert the constraint to upper case here.
9150 //
9151 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9152 // decode the usage of register name aliases into their official names. And
9153 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9154 // official register names.
9155 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9156 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9157 bool IsFP = Constraint[2] == 'f';
9158 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9159 std::pair<unsigned, const TargetRegisterClass *> R;
9161 TRI, join_items("", Temp.first, Temp.second), VT);
9162 // Match those names to the widest floating point register type available.
9163 if (IsFP) {
9164 unsigned RegNo = R.first;
9165 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9166 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9167 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9168 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9169 }
9170 }
9171 }
9172 return R;
9173 }
9174
9175 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9176}
9177
9178void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9179 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9180 SelectionDAG &DAG) const {
9181 // Currently only support length 1 constraints.
9182 if (Constraint.size() == 1) {
9183 switch (Constraint[0]) {
9184 case 'l':
9185 // Validate & create a 16-bit signed immediate operand.
9186 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9187 uint64_t CVal = C->getSExtValue();
9188 if (isInt<16>(CVal))
9189 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9190 Subtarget.getGRLenVT()));
9191 }
9192 return;
9193 case 'I':
9194 // Validate & create a 12-bit signed immediate operand.
9195 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9196 uint64_t CVal = C->getSExtValue();
9197 if (isInt<12>(CVal))
9198 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9199 Subtarget.getGRLenVT()));
9200 }
9201 return;
9202 case 'J':
9203 // Validate & create an integer zero operand.
9204 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9205 if (C->getZExtValue() == 0)
9206 Ops.push_back(
9207 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9208 return;
9209 case 'K':
9210 // Validate & create a 12-bit unsigned immediate operand.
9211 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9212 uint64_t CVal = C->getZExtValue();
9213 if (isUInt<12>(CVal))
9214 Ops.push_back(
9215 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9216 }
9217 return;
9218 default:
9219 break;
9220 }
9221 }
9223}
9224
9225#define GET_REGISTER_MATCHER
9226#include "LoongArchGenAsmMatcher.inc"
9227
9230 const MachineFunction &MF) const {
9231 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9232 std::string NewRegName = Name.second.str();
9233 Register Reg = MatchRegisterAltName(NewRegName);
9234 if (!Reg)
9235 Reg = MatchRegisterName(NewRegName);
9236 if (!Reg)
9237 return Reg;
9238 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9239 if (!ReservedRegs.test(Reg))
9240 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9241 StringRef(RegName) + "\"."));
9242 return Reg;
9243}
9244
9246 EVT VT, SDValue C) const {
9247 // TODO: Support vectors.
9248 if (!VT.isScalarInteger())
9249 return false;
9250
9251 // Omit the optimization if the data size exceeds GRLen.
9252 if (VT.getSizeInBits() > Subtarget.getGRLen())
9253 return false;
9254
9255 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9256 const APInt &Imm = ConstNode->getAPIntValue();
9257 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9258 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9259 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9260 return true;
9261 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9262 if (ConstNode->hasOneUse() &&
9263 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9264 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9265 return true;
9266 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9267 // in which the immediate has two set bits. Or Break (MUL x, imm)
9268 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9269 // equals to (1 << s0) - (1 << s1).
9270 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9271 unsigned Shifts = Imm.countr_zero();
9272 // Reject immediates which can be composed via a single LUI.
9273 if (Shifts >= 12)
9274 return false;
9275 // Reject multiplications can be optimized to
9276 // (SLLI (ALSL x, x, 1/2/3/4), s).
9277 APInt ImmPop = Imm.ashr(Shifts);
9278 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9279 return false;
9280 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9281 // since it needs one more instruction than other 3 cases.
9282 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9283 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9284 (ImmSmall - Imm).isPowerOf2())
9285 return true;
9286 }
9287 }
9288
9289 return false;
9290}
9291
9293 const AddrMode &AM,
9294 Type *Ty, unsigned AS,
9295 Instruction *I) const {
9296 // LoongArch has four basic addressing modes:
9297 // 1. reg
9298 // 2. reg + 12-bit signed offset
9299 // 3. reg + 14-bit signed offset left-shifted by 2
9300 // 4. reg1 + reg2
9301 // TODO: Add more checks after support vector extension.
9302
9303 // No global is ever allowed as a base.
9304 if (AM.BaseGV)
9305 return false;
9306
9307 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9308 // with `UAL` feature.
9309 if (!isInt<12>(AM.BaseOffs) &&
9310 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9311 return false;
9312
9313 switch (AM.Scale) {
9314 case 0:
9315 // "r+i" or just "i", depending on HasBaseReg.
9316 break;
9317 case 1:
9318 // "r+r+i" is not allowed.
9319 if (AM.HasBaseReg && AM.BaseOffs)
9320 return false;
9321 // Otherwise we have "r+r" or "r+i".
9322 break;
9323 case 2:
9324 // "2*r+r" or "2*r+i" is not allowed.
9325 if (AM.HasBaseReg || AM.BaseOffs)
9326 return false;
9327 // Allow "2*r" as "r+r".
9328 break;
9329 default:
9330 return false;
9331 }
9332
9333 return true;
9334}
9335
9337 return isInt<12>(Imm);
9338}
9339
9341 return isInt<12>(Imm);
9342}
9343
9345 // Zexts are free if they can be combined with a load.
9346 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9347 // poorly with type legalization of compares preferring sext.
9348 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9349 EVT MemVT = LD->getMemoryVT();
9350 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9351 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9352 LD->getExtensionType() == ISD::ZEXTLOAD))
9353 return true;
9354 }
9355
9356 return TargetLowering::isZExtFree(Val, VT2);
9357}
9358
9360 EVT DstVT) const {
9361 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9362}
9363
9365 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9366}
9367
9369 // TODO: Support vectors.
9370 if (Y.getValueType().isVector())
9371 return false;
9372
9373 return !isa<ConstantSDNode>(Y);
9374}
9375
9377 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9378 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9379}
9380
9382 Type *Ty, bool IsSigned) const {
9383 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9384 return true;
9385
9386 return IsSigned;
9387}
9388
9390 // Return false to suppress the unnecessary extensions if the LibCall
9391 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9392 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9393 Type.getSizeInBits() < Subtarget.getGRLen()))
9394 return false;
9395 return true;
9396}
9397
9398// memcpy, and other memory intrinsics, typically tries to use wider load/store
9399// if the source/dest is aligned and the copy size is large enough. We therefore
9400// want to align such objects passed to memory intrinsics.
9402 unsigned &MinSize,
9403 Align &PrefAlign) const {
9404 if (!isa<MemIntrinsic>(CI))
9405 return false;
9406
9407 if (Subtarget.is64Bit()) {
9408 MinSize = 8;
9409 PrefAlign = Align(8);
9410 } else {
9411 MinSize = 4;
9412 PrefAlign = Align(4);
9413 }
9414
9415 return true;
9416}
9417
9426
9427bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9428 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9429 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9430 bool IsABIRegCopy = CC.has_value();
9431 EVT ValueVT = Val.getValueType();
9432
9433 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9434 PartVT == MVT::f32) {
9435 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9436 // nan, and cast to f32.
9437 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9438 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9439 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9440 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9441 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9442 Parts[0] = Val;
9443 return true;
9444 }
9445
9446 return false;
9447}
9448
9449SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9450 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9451 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9452 bool IsABIRegCopy = CC.has_value();
9453
9454 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9455 PartVT == MVT::f32) {
9456 SDValue Val = Parts[0];
9457
9458 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9459 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9460 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9461 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9462 return Val;
9463 }
9464
9465 return SDValue();
9466}
9467
9468MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9469 CallingConv::ID CC,
9470 EVT VT) const {
9471 // Use f32 to pass f16.
9472 if (VT == MVT::f16 && Subtarget.hasBasicF())
9473 return MVT::f32;
9474
9476}
9477
9478unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9479 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9480 // Use f32 to pass f16.
9481 if (VT == MVT::f16 && Subtarget.hasBasicF())
9482 return 1;
9483
9485}
9486
9488 SDValue Op, const APInt &OriginalDemandedBits,
9489 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9490 unsigned Depth) const {
9491 EVT VT = Op.getValueType();
9492 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9493 unsigned Opc = Op.getOpcode();
9494 switch (Opc) {
9495 default:
9496 break;
9499 SDValue Src = Op.getOperand(0);
9500 MVT SrcVT = Src.getSimpleValueType();
9501 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9502 unsigned NumElts = SrcVT.getVectorNumElements();
9503
9504 // If we don't need the sign bits at all just return zero.
9505 if (OriginalDemandedBits.countr_zero() >= NumElts)
9506 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9507
9508 // Only demand the vector elements of the sign bits we need.
9509 APInt KnownUndef, KnownZero;
9510 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9511 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9512 TLO, Depth + 1))
9513 return true;
9514
9515 Known.Zero = KnownZero.zext(BitWidth);
9516 Known.Zero.setHighBits(BitWidth - NumElts);
9517
9518 // [X]VMSKLTZ only uses the MSB from each vector element.
9519 KnownBits KnownSrc;
9520 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9521 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9522 Depth + 1))
9523 return true;
9524
9525 if (KnownSrc.One[SrcBits - 1])
9526 Known.One.setLowBits(NumElts);
9527 else if (KnownSrc.Zero[SrcBits - 1])
9528 Known.Zero.setLowBits(NumElts);
9529
9530 // Attempt to avoid multi-use ops if we don't need anything from it.
9532 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9533 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9534 return false;
9535 }
9536 }
9537
9539 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9540}
9541
9543 unsigned Opc = VecOp.getOpcode();
9544
9545 // Assume target opcodes can't be scalarized.
9546 // TODO - do we have any exceptions?
9547 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9548 return false;
9549
9550 // If the vector op is not supported, try to convert to scalar.
9551 EVT VecVT = VecOp.getValueType();
9553 return true;
9554
9555 // If the vector op is supported, but the scalar op is not, the transform may
9556 // not be worthwhile.
9557 EVT ScalarVT = VecVT.getScalarType();
9558 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9559}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...