LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit())
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249
250 if (!Subtarget.hasBasicD()) {
252 if (Subtarget.is64Bit()) {
255 }
256 }
257 }
258
259 // Set operations for 'D' feature.
260
261 if (Subtarget.hasBasicD()) {
262 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
265 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
268 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
269
272 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
276 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
277 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
278 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
282 setOperationAction(ISD::FSIN, MVT::f64, Expand);
283 setOperationAction(ISD::FCOS, MVT::f64, Expand);
284 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
285 setOperationAction(ISD::FPOW, MVT::f64, Expand);
287 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
288 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
289 Subtarget.isSoftFPABI() ? LibCall : Custom);
290 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
291 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293
294 if (Subtarget.is64Bit())
295 setOperationAction(ISD::FRINT, MVT::f64, Legal);
296 }
297
298 // Set operations for 'LSX' feature.
299
300 if (Subtarget.hasExtLSX()) {
302 // Expand all truncating stores and extending loads.
303 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
304 setTruncStoreAction(VT, InnerVT, Expand);
307 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
308 }
309 // By default everything must be expanded. Then we will selectively turn
310 // on ones that can be effectively codegen'd.
311 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 }
314
315 for (MVT VT : LSXVTs) {
316 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
317 setOperationAction(ISD::BITCAST, VT, Legal);
319
323
328 }
329 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
332 Legal);
334 VT, Legal);
341 Expand);
349 }
350 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
352 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
354 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
357 }
358 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
362 setOperationAction(ISD::FSQRT, VT, Legal);
363 setOperationAction(ISD::FNEG, VT, Legal);
366 VT, Expand);
368 }
370 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
371 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
372 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
373 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
374
375 for (MVT VT :
376 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
377 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
379 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
380 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
381 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
382 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
383 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
384 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
385 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
386 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
387 }
388 }
389
390 // Set operations for 'LASX' feature.
391
392 if (Subtarget.hasExtLASX()) {
393 for (MVT VT : LASXVTs) {
394 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
395 setOperationAction(ISD::BITCAST, VT, Legal);
397
403
407 }
408 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
411 Legal);
413 VT, Legal);
420 Expand);
428 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
429 }
430 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
432 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
434 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
437 }
438 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
442 setOperationAction(ISD::FSQRT, VT, Legal);
443 setOperationAction(ISD::FNEG, VT, Legal);
446 VT, Expand);
448 }
449 }
450
451 // Set DAG combine for LA32 and LA64.
452
457
458 // Set DAG combine for 'LSX' feature.
459
460 if (Subtarget.hasExtLSX()) {
462 setTargetDAGCombine(ISD::BITCAST);
463 }
464
465 // Set DAG combine for 'LASX' feature.
466
467 if (Subtarget.hasExtLASX())
469
470 // Compute derived properties from the register classes.
471 computeRegisterProperties(Subtarget.getRegisterInfo());
472
474
477
478 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
479
481
482 // Function alignments.
484 // Set preferred alignments.
485 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
486 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
487 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
488
489 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
490 if (Subtarget.hasLAMCAS())
492
493 if (Subtarget.hasSCQ()) {
495 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
496 }
497}
498
500 const GlobalAddressSDNode *GA) const {
501 // In order to maximise the opportunity for common subexpression elimination,
502 // keep a separate ADD node for the global address offset instead of folding
503 // it in the global address node. Later peephole optimisations may choose to
504 // fold it back in when profitable.
505 return false;
506}
507
509 SelectionDAG &DAG) const {
510 switch (Op.getOpcode()) {
511 case ISD::ATOMIC_FENCE:
512 return lowerATOMIC_FENCE(Op, DAG);
514 return lowerEH_DWARF_CFA(Op, DAG);
516 return lowerGlobalAddress(Op, DAG);
518 return lowerGlobalTLSAddress(Op, DAG);
520 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
522 return lowerINTRINSIC_W_CHAIN(Op, DAG);
524 return lowerINTRINSIC_VOID(Op, DAG);
526 return lowerBlockAddress(Op, DAG);
527 case ISD::JumpTable:
528 return lowerJumpTable(Op, DAG);
529 case ISD::SHL_PARTS:
530 return lowerShiftLeftParts(Op, DAG);
531 case ISD::SRA_PARTS:
532 return lowerShiftRightParts(Op, DAG, true);
533 case ISD::SRL_PARTS:
534 return lowerShiftRightParts(Op, DAG, false);
536 return lowerConstantPool(Op, DAG);
537 case ISD::FP_TO_SINT:
538 return lowerFP_TO_SINT(Op, DAG);
539 case ISD::BITCAST:
540 return lowerBITCAST(Op, DAG);
541 case ISD::UINT_TO_FP:
542 return lowerUINT_TO_FP(Op, DAG);
543 case ISD::SINT_TO_FP:
544 return lowerSINT_TO_FP(Op, DAG);
545 case ISD::VASTART:
546 return lowerVASTART(Op, DAG);
547 case ISD::FRAMEADDR:
548 return lowerFRAMEADDR(Op, DAG);
549 case ISD::RETURNADDR:
550 return lowerRETURNADDR(Op, DAG);
552 return lowerWRITE_REGISTER(Op, DAG);
554 return lowerINSERT_VECTOR_ELT(Op, DAG);
556 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
558 return lowerBUILD_VECTOR(Op, DAG);
560 return lowerCONCAT_VECTORS(Op, DAG);
562 return lowerVECTOR_SHUFFLE(Op, DAG);
563 case ISD::BITREVERSE:
564 return lowerBITREVERSE(Op, DAG);
566 return lowerSCALAR_TO_VECTOR(Op, DAG);
567 case ISD::PREFETCH:
568 return lowerPREFETCH(Op, DAG);
569 case ISD::SELECT:
570 return lowerSELECT(Op, DAG);
571 case ISD::BRCOND:
572 return lowerBRCOND(Op, DAG);
573 case ISD::FP_TO_FP16:
574 return lowerFP_TO_FP16(Op, DAG);
575 case ISD::FP16_TO_FP:
576 return lowerFP16_TO_FP(Op, DAG);
577 case ISD::FP_TO_BF16:
578 return lowerFP_TO_BF16(Op, DAG);
579 case ISD::BF16_TO_FP:
580 return lowerBF16_TO_FP(Op, DAG);
581 case ISD::VECREDUCE_ADD:
582 return lowerVECREDUCE_ADD(Op, DAG);
583 case ISD::VECREDUCE_AND:
584 case ISD::VECREDUCE_OR:
585 case ISD::VECREDUCE_XOR:
586 case ISD::VECREDUCE_SMAX:
587 case ISD::VECREDUCE_SMIN:
588 case ISD::VECREDUCE_UMAX:
589 case ISD::VECREDUCE_UMIN:
590 return lowerVECREDUCE(Op, DAG);
591 case ISD::ConstantFP:
592 return lowerConstantFP(Op, DAG);
593 }
594 return SDValue();
595}
596
597SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
598 SelectionDAG &DAG) const {
599 EVT VT = Op.getValueType();
601 const APFloat &FPVal = CFP->getValueAPF();
602 SDLoc DL(CFP);
603
604 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
605 (VT == MVT::f64 && Subtarget.hasBasicD()));
606
607 // If value is 0.0 or -0.0, just ignore it.
608 if (FPVal.isZero())
609 return SDValue();
610
611 // If lsx enabled, use cheaper 'vldi' instruction if possible.
612 if (isFPImmVLDILegal(FPVal, VT))
613 return SDValue();
614
615 // Construct as integer, and move to float register.
616 APInt INTVal = FPVal.bitcastToAPInt();
617
618 // If more than MaterializeFPImmInsNum instructions will be used to
619 // generate the INTVal and move it to float register, fallback to
620 // use floating point load from the constant pool.
622 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
623 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
624 return SDValue();
625
626 switch (VT.getSimpleVT().SimpleTy) {
627 default:
628 llvm_unreachable("Unexpected floating point type!");
629 break;
630 case MVT::f32: {
631 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
632 if (Subtarget.is64Bit())
633 NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
634 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
636 DL, VT, NewVal);
637 }
638 case MVT::f64: {
639 if (Subtarget.is64Bit()) {
640 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
641 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
642 }
643 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
644 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
645 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
646 }
647 }
648
649 return SDValue();
650}
651
652// Lower vecreduce_add using vhaddw instructions.
653// For Example:
654// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
655// can be lowered to:
656// VHADDW_D_W vr0, vr0, vr0
657// VHADDW_Q_D vr0, vr0, vr0
658// VPICKVE2GR_D a0, vr0, 0
659// ADDI_W a0, a0, 0
660SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
661 SelectionDAG &DAG) const {
662
663 SDLoc DL(Op);
664 MVT OpVT = Op.getSimpleValueType();
665 SDValue Val = Op.getOperand(0);
666
667 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
668 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
669
670 unsigned LegalVecSize = 128;
671 bool isLASX256Vector =
672 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
673
674 // Ensure operand type legal or enable it legal.
675 while (!isTypeLegal(Val.getSimpleValueType())) {
676 Val = DAG.WidenVector(Val, DL);
677 }
678
679 // NumEles is designed for iterations count, v4i32 for LSX
680 // and v8i32 for LASX should have the same count.
681 if (isLASX256Vector) {
682 NumEles /= 2;
683 LegalVecSize = 256;
684 }
685
686 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
687 MVT IntTy = MVT::getIntegerVT(EleBits);
688 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
689 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
690 }
691
692 if (isLASX256Vector) {
693 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
694 DAG.getConstant(2, DL, MVT::i64));
695 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
696 }
697
698 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
699 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
700}
701
702// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
703// For Example:
704// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
705// can be lowered to:
706// VBSRL_V vr1, vr0, 8
707// VMAX_W vr0, vr1, vr0
708// VBSRL_V vr1, vr0, 4
709// VMAX_W vr0, vr1, vr0
710// VPICKVE2GR_W a0, vr0, 0
711// For 256 bit vector, it is illegal and will be spilt into
712// two 128 bit vector by default then processed by this.
713SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
714 SelectionDAG &DAG) const {
715 SDLoc DL(Op);
716
717 MVT OpVT = Op.getSimpleValueType();
718 SDValue Val = Op.getOperand(0);
719
720 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
721 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
722
723 // Ensure operand type legal or enable it legal.
724 while (!isTypeLegal(Val.getSimpleValueType())) {
725 Val = DAG.WidenVector(Val, DL);
726 }
727
728 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
729 MVT VecTy = Val.getSimpleValueType();
730
731 for (int i = NumEles; i > 1; i /= 2) {
732 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
733 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
734 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
735 }
736
737 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
738 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
739}
740
741SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
742 SelectionDAG &DAG) const {
743 unsigned IsData = Op.getConstantOperandVal(4);
744
745 // We don't support non-data prefetch.
746 // Just preserve the chain.
747 if (!IsData)
748 return Op.getOperand(0);
749
750 return Op;
751}
752
753// Return true if Val is equal to (setcc LHS, RHS, CC).
754// Return false if Val is the inverse of (setcc LHS, RHS, CC).
755// Otherwise, return std::nullopt.
756static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
757 ISD::CondCode CC, SDValue Val) {
758 assert(Val->getOpcode() == ISD::SETCC);
759 SDValue LHS2 = Val.getOperand(0);
760 SDValue RHS2 = Val.getOperand(1);
761 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
762
763 if (LHS == LHS2 && RHS == RHS2) {
764 if (CC == CC2)
765 return true;
766 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
767 return false;
768 } else if (LHS == RHS2 && RHS == LHS2) {
770 if (CC == CC2)
771 return true;
772 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
773 return false;
774 }
775
776 return std::nullopt;
777}
778
780 const LoongArchSubtarget &Subtarget) {
781 SDValue CondV = N->getOperand(0);
782 SDValue TrueV = N->getOperand(1);
783 SDValue FalseV = N->getOperand(2);
784 MVT VT = N->getSimpleValueType(0);
785 SDLoc DL(N);
786
787 // (select c, -1, y) -> -c | y
788 if (isAllOnesConstant(TrueV)) {
789 SDValue Neg = DAG.getNegative(CondV, DL, VT);
790 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
791 }
792 // (select c, y, -1) -> (c-1) | y
793 if (isAllOnesConstant(FalseV)) {
794 SDValue Neg =
795 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
796 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
797 }
798
799 // (select c, 0, y) -> (c-1) & y
800 if (isNullConstant(TrueV)) {
801 SDValue Neg =
802 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
803 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
804 }
805 // (select c, y, 0) -> -c & y
806 if (isNullConstant(FalseV)) {
807 SDValue Neg = DAG.getNegative(CondV, DL, VT);
808 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
809 }
810
811 // select c, ~x, x --> xor -c, x
812 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
813 const APInt &TrueVal = TrueV->getAsAPIntVal();
814 const APInt &FalseVal = FalseV->getAsAPIntVal();
815 if (~TrueVal == FalseVal) {
816 SDValue Neg = DAG.getNegative(CondV, DL, VT);
817 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
818 }
819 }
820
821 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
822 // when both truev and falsev are also setcc.
823 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
824 FalseV.getOpcode() == ISD::SETCC) {
825 SDValue LHS = CondV.getOperand(0);
826 SDValue RHS = CondV.getOperand(1);
827 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
828
829 // (select x, x, y) -> x | y
830 // (select !x, x, y) -> x & y
831 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
832 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
833 DAG.getFreeze(FalseV));
834 }
835 // (select x, y, x) -> x & y
836 // (select !x, y, x) -> x | y
837 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
838 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
839 DAG.getFreeze(TrueV), FalseV);
840 }
841 }
842
843 return SDValue();
844}
845
846// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
847// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
848// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
849// being `0` or `-1`. In such cases we can replace `select` with `and`.
850// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
851// than `c0`?
852static SDValue
854 const LoongArchSubtarget &Subtarget) {
855 unsigned SelOpNo = 0;
856 SDValue Sel = BO->getOperand(0);
857 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
858 SelOpNo = 1;
859 Sel = BO->getOperand(1);
860 }
861
862 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
863 return SDValue();
864
865 unsigned ConstSelOpNo = 1;
866 unsigned OtherSelOpNo = 2;
867 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
868 ConstSelOpNo = 2;
869 OtherSelOpNo = 1;
870 }
871 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
872 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
873 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
874 return SDValue();
875
876 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
877 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
878 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
879 return SDValue();
880
881 SDLoc DL(Sel);
882 EVT VT = BO->getValueType(0);
883
884 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
885 if (SelOpNo == 1)
886 std::swap(NewConstOps[0], NewConstOps[1]);
887
888 SDValue NewConstOp =
889 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
890 if (!NewConstOp)
891 return SDValue();
892
893 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
894 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
895 return SDValue();
896
897 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
898 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
899 if (SelOpNo == 1)
900 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
901 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
902
903 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
904 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
905 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
906}
907
908// Changes the condition code and swaps operands if necessary, so the SetCC
909// operation matches one of the comparisons supported directly by branches
910// in the LoongArch ISA. May adjust compares to favor compare with 0 over
911// compare with 1/-1.
913 ISD::CondCode &CC, SelectionDAG &DAG) {
914 // If this is a single bit test that can't be handled by ANDI, shift the
915 // bit to be tested to the MSB and perform a signed compare with 0.
916 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
917 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
918 isa<ConstantSDNode>(LHS.getOperand(1))) {
919 uint64_t Mask = LHS.getConstantOperandVal(1);
920 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
921 unsigned ShAmt = 0;
922 if (isPowerOf2_64(Mask)) {
923 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
924 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
925 } else {
926 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
927 }
928
929 LHS = LHS.getOperand(0);
930 if (ShAmt != 0)
931 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
932 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
933 return;
934 }
935 }
936
937 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
938 int64_t C = RHSC->getSExtValue();
939 switch (CC) {
940 default:
941 break;
942 case ISD::SETGT:
943 // Convert X > -1 to X >= 0.
944 if (C == -1) {
945 RHS = DAG.getConstant(0, DL, RHS.getValueType());
946 CC = ISD::SETGE;
947 return;
948 }
949 break;
950 case ISD::SETLT:
951 // Convert X < 1 to 0 >= X.
952 if (C == 1) {
953 RHS = LHS;
954 LHS = DAG.getConstant(0, DL, RHS.getValueType());
955 CC = ISD::SETGE;
956 return;
957 }
958 break;
959 }
960 }
961
962 switch (CC) {
963 default:
964 break;
965 case ISD::SETGT:
966 case ISD::SETLE:
967 case ISD::SETUGT:
968 case ISD::SETULE:
970 std::swap(LHS, RHS);
971 break;
972 }
973}
974
975SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
976 SelectionDAG &DAG) const {
977 SDValue CondV = Op.getOperand(0);
978 SDValue TrueV = Op.getOperand(1);
979 SDValue FalseV = Op.getOperand(2);
980 SDLoc DL(Op);
981 MVT VT = Op.getSimpleValueType();
982 MVT GRLenVT = Subtarget.getGRLenVT();
983
984 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
985 return V;
986
987 if (Op.hasOneUse()) {
988 unsigned UseOpc = Op->user_begin()->getOpcode();
989 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
990 SDNode *BinOp = *Op->user_begin();
991 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
992 DAG, Subtarget)) {
993 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
994 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
995 // may return a constant node and cause crash in lowerSELECT.
996 if (NewSel.getOpcode() == ISD::SELECT)
997 return lowerSELECT(NewSel, DAG);
998 return NewSel;
999 }
1000 }
1001 }
1002
1003 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1004 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1005 // (select condv, truev, falsev)
1006 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1007 if (CondV.getOpcode() != ISD::SETCC ||
1008 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1009 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1010 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1011
1012 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1013
1014 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1015 }
1016
1017 // If the CondV is the output of a SETCC node which operates on GRLenVT
1018 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1019 // to take advantage of the integer compare+branch instructions. i.e.: (select
1020 // (setcc lhs, rhs, cc), truev, falsev)
1021 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1022 SDValue LHS = CondV.getOperand(0);
1023 SDValue RHS = CondV.getOperand(1);
1024 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1025
1026 // Special case for a select of 2 constants that have a difference of 1.
1027 // Normally this is done by DAGCombine, but if the select is introduced by
1028 // type legalization or op legalization, we miss it. Restricting to SETLT
1029 // case for now because that is what signed saturating add/sub need.
1030 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1031 // but we would probably want to swap the true/false values if the condition
1032 // is SETGE/SETLE to avoid an XORI.
1033 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1034 CCVal == ISD::SETLT) {
1035 const APInt &TrueVal = TrueV->getAsAPIntVal();
1036 const APInt &FalseVal = FalseV->getAsAPIntVal();
1037 if (TrueVal - 1 == FalseVal)
1038 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1039 if (TrueVal + 1 == FalseVal)
1040 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1041 }
1042
1043 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1044 // 1 < x ? x : 1 -> 0 < x ? x : 1
1045 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1046 RHS == TrueV && LHS == FalseV) {
1047 LHS = DAG.getConstant(0, DL, VT);
1048 // 0 <u x is the same as x != 0.
1049 if (CCVal == ISD::SETULT) {
1050 std::swap(LHS, RHS);
1051 CCVal = ISD::SETNE;
1052 }
1053 }
1054
1055 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1056 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1057 RHS == FalseV) {
1058 RHS = DAG.getConstant(0, DL, VT);
1059 }
1060
1061 SDValue TargetCC = DAG.getCondCode(CCVal);
1062
1063 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1064 // (select (setcc lhs, rhs, CC), constant, falsev)
1065 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1066 std::swap(TrueV, FalseV);
1067 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1068 }
1069
1070 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1071 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1072}
1073
1074SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1075 SelectionDAG &DAG) const {
1076 SDValue CondV = Op.getOperand(1);
1077 SDLoc DL(Op);
1078 MVT GRLenVT = Subtarget.getGRLenVT();
1079
1080 if (CondV.getOpcode() == ISD::SETCC) {
1081 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1082 SDValue LHS = CondV.getOperand(0);
1083 SDValue RHS = CondV.getOperand(1);
1084 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1085
1086 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1087
1088 SDValue TargetCC = DAG.getCondCode(CCVal);
1089 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1090 Op.getOperand(0), LHS, RHS, TargetCC,
1091 Op.getOperand(2));
1092 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1093 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1094 Op.getOperand(0), CondV, Op.getOperand(2));
1095 }
1096 }
1097
1098 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1099 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1100 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1101}
1102
1103SDValue
1104LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1105 SelectionDAG &DAG) const {
1106 SDLoc DL(Op);
1107 MVT OpVT = Op.getSimpleValueType();
1108
1109 SDValue Vector = DAG.getUNDEF(OpVT);
1110 SDValue Val = Op.getOperand(0);
1111 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1112
1113 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1114}
1115
1116SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1117 SelectionDAG &DAG) const {
1118 EVT ResTy = Op->getValueType(0);
1119 SDValue Src = Op->getOperand(0);
1120 SDLoc DL(Op);
1121
1122 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1123 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1124 unsigned int NewEltNum = NewVT.getVectorNumElements();
1125
1126 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1127
1129 for (unsigned int i = 0; i < NewEltNum; i++) {
1130 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1131 DAG.getConstant(i, DL, MVT::i64));
1132 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1133 ? (unsigned)LoongArchISD::BITREV_8B
1134 : (unsigned)ISD::BITREVERSE;
1135 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1136 }
1137 SDValue Res =
1138 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1139
1140 switch (ResTy.getSimpleVT().SimpleTy) {
1141 default:
1142 return SDValue();
1143 case MVT::v16i8:
1144 case MVT::v32i8:
1145 return Res;
1146 case MVT::v8i16:
1147 case MVT::v16i16:
1148 case MVT::v4i32:
1149 case MVT::v8i32: {
1151 for (unsigned int i = 0; i < NewEltNum; i++)
1152 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1153 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1154 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1155 }
1156 }
1157}
1158
1159// Widen element type to get a new mask value (if possible).
1160// For example:
1161// shufflevector <4 x i32> %a, <4 x i32> %b,
1162// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1163// is equivalent to:
1164// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1165// can be lowered to:
1166// VPACKOD_D vr0, vr0, vr1
1168 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1169 unsigned EltBits = VT.getScalarSizeInBits();
1170
1171 if (EltBits > 32 || EltBits == 1)
1172 return SDValue();
1173
1174 SmallVector<int, 8> NewMask;
1175 if (widenShuffleMaskElts(Mask, NewMask)) {
1176 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1177 : MVT::getIntegerVT(EltBits * 2);
1178 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1179 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1180 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1181 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1182 return DAG.getBitcast(
1183 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1184 }
1185 }
1186
1187 return SDValue();
1188}
1189
1190/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1191/// instruction.
1192// The funciton matches elements from one of the input vector shuffled to the
1193// left or right with zeroable elements 'shifted in'. It handles both the
1194// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1195// lane.
1196// Mostly copied from X86.
1197static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1198 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1199 int MaskOffset, const APInt &Zeroable) {
1200 int Size = Mask.size();
1201 unsigned SizeInBits = Size * ScalarSizeInBits;
1202
1203 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1204 for (int i = 0; i < Size; i += Scale)
1205 for (int j = 0; j < Shift; ++j)
1206 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1207 return false;
1208
1209 return true;
1210 };
1211
1212 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1213 int Step = 1) {
1214 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1215 if (!(Mask[i] == -1 || Mask[i] == Low))
1216 return false;
1217 return true;
1218 };
1219
1220 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1221 for (int i = 0; i != Size; i += Scale) {
1222 unsigned Pos = Left ? i + Shift : i;
1223 unsigned Low = Left ? i : i + Shift;
1224 unsigned Len = Scale - Shift;
1225 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1226 return -1;
1227 }
1228
1229 int ShiftEltBits = ScalarSizeInBits * Scale;
1230 bool ByteShift = ShiftEltBits > 64;
1231 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1232 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1233 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1234
1235 // Normalize the scale for byte shifts to still produce an i64 element
1236 // type.
1237 Scale = ByteShift ? Scale / 2 : Scale;
1238
1239 // We need to round trip through the appropriate type for the shift.
1240 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1241 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1242 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1243 return (int)ShiftAmt;
1244 };
1245
1246 unsigned MaxWidth = 128;
1247 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1248 for (int Shift = 1; Shift != Scale; ++Shift)
1249 for (bool Left : {true, false})
1250 if (CheckZeros(Shift, Scale, Left)) {
1251 int ShiftAmt = MatchShift(Shift, Scale, Left);
1252 if (0 < ShiftAmt)
1253 return ShiftAmt;
1254 }
1255
1256 // no match
1257 return -1;
1258}
1259
1260/// Lower VECTOR_SHUFFLE as shift (if possible).
1261///
1262/// For example:
1263/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1264/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1265/// is lowered to:
1266/// (VBSLL_V $v0, $v0, 4)
1267///
1268/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1269/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1270/// is lowered to:
1271/// (VSLLI_D $v0, $v0, 32)
1273 MVT VT, SDValue V1, SDValue V2,
1274 SelectionDAG &DAG,
1275 const LoongArchSubtarget &Subtarget,
1276 const APInt &Zeroable) {
1277 int Size = Mask.size();
1278 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1279
1280 MVT ShiftVT;
1281 SDValue V = V1;
1282 unsigned Opcode;
1283
1284 // Try to match shuffle against V1 shift.
1285 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1286 Mask, 0, Zeroable);
1287
1288 // If V1 failed, try to match shuffle against V2 shift.
1289 if (ShiftAmt < 0) {
1290 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1291 Mask, Size, Zeroable);
1292 V = V2;
1293 }
1294
1295 if (ShiftAmt < 0)
1296 return SDValue();
1297
1298 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1299 "Illegal integer vector type");
1300 V = DAG.getBitcast(ShiftVT, V);
1301 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1302 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1303 return DAG.getBitcast(VT, V);
1304}
1305
1306/// Determine whether a range fits a regular pattern of values.
1307/// This function accounts for the possibility of jumping over the End iterator.
1308template <typename ValType>
1309static bool
1311 unsigned CheckStride,
1313 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1314 auto &I = Begin;
1315
1316 while (I != End) {
1317 if (*I != -1 && *I != ExpectedIndex)
1318 return false;
1319 ExpectedIndex += ExpectedIndexStride;
1320
1321 // Incrementing past End is undefined behaviour so we must increment one
1322 // step at a time and check for End at each step.
1323 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1324 ; // Empty loop body.
1325 }
1326 return true;
1327}
1328
1329/// Compute whether each element of a shuffle is zeroable.
1330///
1331/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1333 SDValue V2, APInt &KnownUndef,
1334 APInt &KnownZero) {
1335 int Size = Mask.size();
1336 KnownUndef = KnownZero = APInt::getZero(Size);
1337
1338 V1 = peekThroughBitcasts(V1);
1339 V2 = peekThroughBitcasts(V2);
1340
1341 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1342 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1343
1344 int VectorSizeInBits = V1.getValueSizeInBits();
1345 int ScalarSizeInBits = VectorSizeInBits / Size;
1346 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1347 (void)ScalarSizeInBits;
1348
1349 for (int i = 0; i < Size; ++i) {
1350 int M = Mask[i];
1351 if (M < 0) {
1352 KnownUndef.setBit(i);
1353 continue;
1354 }
1355 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1356 KnownZero.setBit(i);
1357 continue;
1358 }
1359 }
1360}
1361
1362/// Test whether a shuffle mask is equivalent within each sub-lane.
1363///
1364/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1365/// non-trivial to compute in the face of undef lanes. The representation is
1366/// suitable for use with existing 128-bit shuffles as entries from the second
1367/// vector have been remapped to [LaneSize, 2*LaneSize).
1368static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1369 ArrayRef<int> Mask,
1370 SmallVectorImpl<int> &RepeatedMask) {
1371 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1372 RepeatedMask.assign(LaneSize, -1);
1373 int Size = Mask.size();
1374 for (int i = 0; i < Size; ++i) {
1375 assert(Mask[i] == -1 || Mask[i] >= 0);
1376 if (Mask[i] < 0)
1377 continue;
1378 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1379 // This entry crosses lanes, so there is no way to model this shuffle.
1380 return false;
1381
1382 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1383 // Adjust second vector indices to start at LaneSize instead of Size.
1384 int LocalM =
1385 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1386 if (RepeatedMask[i % LaneSize] < 0)
1387 // This is the first non-undef entry in this slot of a 128-bit lane.
1388 RepeatedMask[i % LaneSize] = LocalM;
1389 else if (RepeatedMask[i % LaneSize] != LocalM)
1390 // Found a mismatch with the repeated mask.
1391 return false;
1392 }
1393 return true;
1394}
1395
1396/// Attempts to match vector shuffle as byte rotation.
1398 ArrayRef<int> Mask) {
1399
1400 SDValue Lo, Hi;
1401 SmallVector<int, 16> RepeatedMask;
1402
1403 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1404 return -1;
1405
1406 int NumElts = RepeatedMask.size();
1407 int Rotation = 0;
1408 int Scale = 16 / NumElts;
1409
1410 for (int i = 0; i < NumElts; ++i) {
1411 int M = RepeatedMask[i];
1412 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1413 "Unexpected mask index.");
1414 if (M < 0)
1415 continue;
1416
1417 // Determine where a rotated vector would have started.
1418 int StartIdx = i - (M % NumElts);
1419 if (StartIdx == 0)
1420 return -1;
1421
1422 // If we found the tail of a vector the rotation must be the missing
1423 // front. If we found the head of a vector, it must be how much of the
1424 // head.
1425 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1426
1427 if (Rotation == 0)
1428 Rotation = CandidateRotation;
1429 else if (Rotation != CandidateRotation)
1430 return -1;
1431
1432 // Compute which value this mask is pointing at.
1433 SDValue MaskV = M < NumElts ? V1 : V2;
1434
1435 // Compute which of the two target values this index should be assigned
1436 // to. This reflects whether the high elements are remaining or the low
1437 // elements are remaining.
1438 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1439
1440 // Either set up this value if we've not encountered it before, or check
1441 // that it remains consistent.
1442 if (!TargetV)
1443 TargetV = MaskV;
1444 else if (TargetV != MaskV)
1445 return -1;
1446 }
1447
1448 // Check that we successfully analyzed the mask, and normalize the results.
1449 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1450 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1451 if (!Lo)
1452 Lo = Hi;
1453 else if (!Hi)
1454 Hi = Lo;
1455
1456 V1 = Lo;
1457 V2 = Hi;
1458
1459 return Rotation * Scale;
1460}
1461
1462/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1463///
1464/// For example:
1465/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1466/// <2 x i32> <i32 3, i32 0>
1467/// is lowered to:
1468/// (VBSRL_V $v1, $v1, 8)
1469/// (VBSLL_V $v0, $v0, 8)
1470/// (VOR_V $v0, $V0, $v1)
1471static SDValue
1473 SDValue V1, SDValue V2, SelectionDAG &DAG,
1474 const LoongArchSubtarget &Subtarget) {
1475
1476 SDValue Lo = V1, Hi = V2;
1477 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1478 if (ByteRotation <= 0)
1479 return SDValue();
1480
1481 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1482 Lo = DAG.getBitcast(ByteVT, Lo);
1483 Hi = DAG.getBitcast(ByteVT, Hi);
1484
1485 int LoByteShift = 16 - ByteRotation;
1486 int HiByteShift = ByteRotation;
1487 MVT GRLenVT = Subtarget.getGRLenVT();
1488
1489 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1490 DAG.getConstant(LoByteShift, DL, GRLenVT));
1491 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1492 DAG.getConstant(HiByteShift, DL, GRLenVT));
1493 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1494}
1495
1496/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1497///
1498/// For example:
1499/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1500/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1501/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1502/// is lowered to:
1503/// (VREPLI $v1, 0)
1504/// (VILVL $v0, $v1, $v0)
1506 ArrayRef<int> Mask, MVT VT,
1507 SDValue V1, SDValue V2,
1508 SelectionDAG &DAG,
1509 const APInt &Zeroable) {
1510 int Bits = VT.getSizeInBits();
1511 int EltBits = VT.getScalarSizeInBits();
1512 int NumElements = VT.getVectorNumElements();
1513
1514 if (Zeroable.isAllOnes())
1515 return DAG.getConstant(0, DL, VT);
1516
1517 // Define a helper function to check a particular ext-scale and lower to it if
1518 // valid.
1519 auto Lower = [&](int Scale) -> SDValue {
1520 SDValue InputV;
1521 bool AnyExt = true;
1522 int Offset = 0;
1523 for (int i = 0; i < NumElements; i++) {
1524 int M = Mask[i];
1525 if (M < 0)
1526 continue;
1527 if (i % Scale != 0) {
1528 // Each of the extended elements need to be zeroable.
1529 if (!Zeroable[i])
1530 return SDValue();
1531
1532 AnyExt = false;
1533 continue;
1534 }
1535
1536 // Each of the base elements needs to be consecutive indices into the
1537 // same input vector.
1538 SDValue V = M < NumElements ? V1 : V2;
1539 M = M % NumElements;
1540 if (!InputV) {
1541 InputV = V;
1542 Offset = M - (i / Scale);
1543
1544 // These offset can't be handled
1545 if (Offset % (NumElements / Scale))
1546 return SDValue();
1547 } else if (InputV != V)
1548 return SDValue();
1549
1550 if (M != (Offset + (i / Scale)))
1551 return SDValue(); // Non-consecutive strided elements.
1552 }
1553
1554 // If we fail to find an input, we have a zero-shuffle which should always
1555 // have already been handled.
1556 if (!InputV)
1557 return SDValue();
1558
1559 do {
1560 unsigned VilVLoHi = LoongArchISD::VILVL;
1561 if (Offset >= (NumElements / 2)) {
1562 VilVLoHi = LoongArchISD::VILVH;
1563 Offset -= (NumElements / 2);
1564 }
1565
1566 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1567 SDValue Ext =
1568 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1569 InputV = DAG.getBitcast(InputVT, InputV);
1570 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1571 Scale /= 2;
1572 EltBits *= 2;
1573 NumElements /= 2;
1574 } while (Scale > 1);
1575 return DAG.getBitcast(VT, InputV);
1576 };
1577
1578 // Each iteration, try extending the elements half as much, but into twice as
1579 // many elements.
1580 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1581 NumExtElements *= 2) {
1582 if (SDValue V = Lower(NumElements / NumExtElements))
1583 return V;
1584 }
1585 return SDValue();
1586}
1587
1588/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1589///
1590/// VREPLVEI performs vector broadcast based on an element specified by an
1591/// integer immediate, with its mask being similar to:
1592/// <x, x, x, ...>
1593/// where x is any valid index.
1594///
1595/// When undef's appear in the mask they are treated as if they were whatever
1596/// value is necessary in order to fit the above form.
1597static SDValue
1599 SDValue V1, SDValue V2, SelectionDAG &DAG,
1600 const LoongArchSubtarget &Subtarget) {
1601 int SplatIndex = -1;
1602 for (const auto &M : Mask) {
1603 if (M != -1) {
1604 SplatIndex = M;
1605 break;
1606 }
1607 }
1608
1609 if (SplatIndex == -1)
1610 return DAG.getUNDEF(VT);
1611
1612 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1613 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1614 APInt Imm(64, SplatIndex);
1615 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1616 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1617 }
1618
1619 return SDValue();
1620}
1621
1622/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1623///
1624/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1625/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1626///
1627/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1628/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1629/// When undef's appear they are treated as if they were whatever value is
1630/// necessary in order to fit the above forms.
1631///
1632/// For example:
1633/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1634/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1635/// i32 7, i32 6, i32 5, i32 4>
1636/// is lowered to:
1637/// (VSHUF4I_H $v0, $v1, 27)
1638/// where the 27 comes from:
1639/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1640static SDValue
1642 SDValue V1, SDValue V2, SelectionDAG &DAG,
1643 const LoongArchSubtarget &Subtarget) {
1644
1645 unsigned SubVecSize = 4;
1646 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1647 SubVecSize = 2;
1648
1649 int SubMask[4] = {-1, -1, -1, -1};
1650 for (unsigned i = 0; i < SubVecSize; ++i) {
1651 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1652 int M = Mask[j];
1653
1654 // Convert from vector index to 4-element subvector index
1655 // If an index refers to an element outside of the subvector then give up
1656 if (M != -1) {
1657 M -= 4 * (j / SubVecSize);
1658 if (M < 0 || M >= 4)
1659 return SDValue();
1660 }
1661
1662 // If the mask has an undef, replace it with the current index.
1663 // Note that it might still be undef if the current index is also undef
1664 if (SubMask[i] == -1)
1665 SubMask[i] = M;
1666 // Check that non-undef values are the same as in the mask. If they
1667 // aren't then give up
1668 else if (M != -1 && M != SubMask[i])
1669 return SDValue();
1670 }
1671 }
1672
1673 // Calculate the immediate. Replace any remaining undefs with zero
1674 APInt Imm(64, 0);
1675 for (int i = SubVecSize - 1; i >= 0; --i) {
1676 int M = SubMask[i];
1677
1678 if (M == -1)
1679 M = 0;
1680
1681 Imm <<= 2;
1682 Imm |= M & 0x3;
1683 }
1684
1685 MVT GRLenVT = Subtarget.getGRLenVT();
1686
1687 // Return vshuf4i.d
1688 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1689 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1690 DAG.getConstant(Imm, DL, GRLenVT));
1691
1692 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1693 DAG.getConstant(Imm, DL, GRLenVT));
1694}
1695
1696/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1697///
1698/// VPACKEV interleaves the even elements from each vector.
1699///
1700/// It is possible to lower into VPACKEV when the mask consists of two of the
1701/// following forms interleaved:
1702/// <0, 2, 4, ...>
1703/// <n, n+2, n+4, ...>
1704/// where n is the number of elements in the vector.
1705/// For example:
1706/// <0, 0, 2, 2, 4, 4, ...>
1707/// <0, n, 2, n+2, 4, n+4, ...>
1708///
1709/// When undef's appear in the mask they are treated as if they were whatever
1710/// value is necessary in order to fit the above forms.
1712 MVT VT, SDValue V1, SDValue V2,
1713 SelectionDAG &DAG) {
1714
1715 const auto &Begin = Mask.begin();
1716 const auto &End = Mask.end();
1717 SDValue OriV1 = V1, OriV2 = V2;
1718
1719 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1720 V1 = OriV1;
1721 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1722 V1 = OriV2;
1723 else
1724 return SDValue();
1725
1726 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1727 V2 = OriV1;
1728 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1729 V2 = OriV2;
1730 else
1731 return SDValue();
1732
1733 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1734}
1735
1736/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1737///
1738/// VPACKOD interleaves the odd elements from each vector.
1739///
1740/// It is possible to lower into VPACKOD when the mask consists of two of the
1741/// following forms interleaved:
1742/// <1, 3, 5, ...>
1743/// <n+1, n+3, n+5, ...>
1744/// where n is the number of elements in the vector.
1745/// For example:
1746/// <1, 1, 3, 3, 5, 5, ...>
1747/// <1, n+1, 3, n+3, 5, n+5, ...>
1748///
1749/// When undef's appear in the mask they are treated as if they were whatever
1750/// value is necessary in order to fit the above forms.
1752 MVT VT, SDValue V1, SDValue V2,
1753 SelectionDAG &DAG) {
1754
1755 const auto &Begin = Mask.begin();
1756 const auto &End = Mask.end();
1757 SDValue OriV1 = V1, OriV2 = V2;
1758
1759 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1760 V1 = OriV1;
1761 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1762 V1 = OriV2;
1763 else
1764 return SDValue();
1765
1766 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1767 V2 = OriV1;
1768 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1769 V2 = OriV2;
1770 else
1771 return SDValue();
1772
1773 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1774}
1775
1776/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1777///
1778/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1779/// of each vector.
1780///
1781/// It is possible to lower into VILVH when the mask consists of two of the
1782/// following forms interleaved:
1783/// <x, x+1, x+2, ...>
1784/// <n+x, n+x+1, n+x+2, ...>
1785/// where n is the number of elements in the vector and x is half n.
1786/// For example:
1787/// <x, x, x+1, x+1, x+2, x+2, ...>
1788/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1789///
1790/// When undef's appear in the mask they are treated as if they were whatever
1791/// value is necessary in order to fit the above forms.
1793 MVT VT, SDValue V1, SDValue V2,
1794 SelectionDAG &DAG) {
1795
1796 const auto &Begin = Mask.begin();
1797 const auto &End = Mask.end();
1798 unsigned HalfSize = Mask.size() / 2;
1799 SDValue OriV1 = V1, OriV2 = V2;
1800
1801 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1802 V1 = OriV1;
1803 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1804 V1 = OriV2;
1805 else
1806 return SDValue();
1807
1808 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1809 V2 = OriV1;
1810 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1811 1))
1812 V2 = OriV2;
1813 else
1814 return SDValue();
1815
1816 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1817}
1818
1819/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1820///
1821/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1822/// of each vector.
1823///
1824/// It is possible to lower into VILVL when the mask consists of two of the
1825/// following forms interleaved:
1826/// <0, 1, 2, ...>
1827/// <n, n+1, n+2, ...>
1828/// where n is the number of elements in the vector.
1829/// For example:
1830/// <0, 0, 1, 1, 2, 2, ...>
1831/// <0, n, 1, n+1, 2, n+2, ...>
1832///
1833/// When undef's appear in the mask they are treated as if they were whatever
1834/// value is necessary in order to fit the above forms.
1836 MVT VT, SDValue V1, SDValue V2,
1837 SelectionDAG &DAG) {
1838
1839 const auto &Begin = Mask.begin();
1840 const auto &End = Mask.end();
1841 SDValue OriV1 = V1, OriV2 = V2;
1842
1843 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1844 V1 = OriV1;
1845 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1846 V1 = OriV2;
1847 else
1848 return SDValue();
1849
1850 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1851 V2 = OriV1;
1852 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1853 V2 = OriV2;
1854 else
1855 return SDValue();
1856
1857 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1858}
1859
1860/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1861///
1862/// VPICKEV copies the even elements of each vector into the result vector.
1863///
1864/// It is possible to lower into VPICKEV when the mask consists of two of the
1865/// following forms concatenated:
1866/// <0, 2, 4, ...>
1867/// <n, n+2, n+4, ...>
1868/// where n is the number of elements in the vector.
1869/// For example:
1870/// <0, 2, 4, ..., 0, 2, 4, ...>
1871/// <0, 2, 4, ..., n, n+2, n+4, ...>
1872///
1873/// When undef's appear in the mask they are treated as if they were whatever
1874/// value is necessary in order to fit the above forms.
1876 MVT VT, SDValue V1, SDValue V2,
1877 SelectionDAG &DAG) {
1878
1879 const auto &Begin = Mask.begin();
1880 const auto &Mid = Mask.begin() + Mask.size() / 2;
1881 const auto &End = Mask.end();
1882 SDValue OriV1 = V1, OriV2 = V2;
1883
1884 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1885 V1 = OriV1;
1886 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1887 V1 = OriV2;
1888 else
1889 return SDValue();
1890
1891 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1892 V2 = OriV1;
1893 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1894 V2 = OriV2;
1895
1896 else
1897 return SDValue();
1898
1899 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1900}
1901
1902/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1903///
1904/// VPICKOD copies the odd elements of each vector into the result vector.
1905///
1906/// It is possible to lower into VPICKOD when the mask consists of two of the
1907/// following forms concatenated:
1908/// <1, 3, 5, ...>
1909/// <n+1, n+3, n+5, ...>
1910/// where n is the number of elements in the vector.
1911/// For example:
1912/// <1, 3, 5, ..., 1, 3, 5, ...>
1913/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1914///
1915/// When undef's appear in the mask they are treated as if they were whatever
1916/// value is necessary in order to fit the above forms.
1918 MVT VT, SDValue V1, SDValue V2,
1919 SelectionDAG &DAG) {
1920
1921 const auto &Begin = Mask.begin();
1922 const auto &Mid = Mask.begin() + Mask.size() / 2;
1923 const auto &End = Mask.end();
1924 SDValue OriV1 = V1, OriV2 = V2;
1925
1926 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1927 V1 = OriV1;
1928 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1929 V1 = OriV2;
1930 else
1931 return SDValue();
1932
1933 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1934 V2 = OriV1;
1935 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1936 V2 = OriV2;
1937 else
1938 return SDValue();
1939
1940 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1941}
1942
1943/// Lower VECTOR_SHUFFLE into VSHUF.
1944///
1945/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1946/// adding it as an operand to the resulting VSHUF.
1948 MVT VT, SDValue V1, SDValue V2,
1949 SelectionDAG &DAG) {
1950
1952 for (auto M : Mask)
1953 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1954
1955 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1956 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1957
1958 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1959 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1960 // VSHF concatenates the vectors in a bitwise fashion:
1961 // <0b00, 0b01> + <0b10, 0b11> ->
1962 // 0b0100 + 0b1110 -> 0b01001110
1963 // <0b10, 0b11, 0b00, 0b01>
1964 // We must therefore swap the operands to get the correct result.
1965 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1966}
1967
1968/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1969///
1970/// This routine breaks down the specific type of 128-bit shuffle and
1971/// dispatches to the lowering routines accordingly.
1973 SDValue V1, SDValue V2, SelectionDAG &DAG,
1974 const LoongArchSubtarget &Subtarget) {
1975 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1976 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1977 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1978 "Vector type is unsupported for lsx!");
1980 "Two operands have different types!");
1981 assert(VT.getVectorNumElements() == Mask.size() &&
1982 "Unexpected mask size for shuffle!");
1983 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1984
1985 APInt KnownUndef, KnownZero;
1986 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1987 APInt Zeroable = KnownUndef | KnownZero;
1988
1989 SDValue Result;
1990 // TODO: Add more comparison patterns.
1991 if (V2.isUndef()) {
1992 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1993 Subtarget)))
1994 return Result;
1995 if ((Result =
1996 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1997 return Result;
1998
1999 // TODO: This comment may be enabled in the future to better match the
2000 // pattern for instruction selection.
2001 /* V2 = V1; */
2002 }
2003
2004 // It is recommended not to change the pattern comparison order for better
2005 // performance.
2006 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2007 return Result;
2008 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2009 return Result;
2010 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2011 return Result;
2012 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2013 return Result;
2014 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2015 return Result;
2016 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2017 return Result;
2018 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2019 (Result =
2020 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2021 return Result;
2022 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2023 Zeroable)))
2024 return Result;
2025 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2026 Zeroable)))
2027 return Result;
2028 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2029 Subtarget)))
2030 return Result;
2031 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2032 return NewShuffle;
2033 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
2034 return Result;
2035 return SDValue();
2036}
2037
2038/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2039///
2040/// It is a XVREPLVEI when the mask is:
2041/// <x, x, x, ..., x+n, x+n, x+n, ...>
2042/// where the number of x is equal to n and n is half the length of vector.
2043///
2044/// When undef's appear in the mask they are treated as if they were whatever
2045/// value is necessary in order to fit the above form.
2046static SDValue
2048 SDValue V1, SDValue V2, SelectionDAG &DAG,
2049 const LoongArchSubtarget &Subtarget) {
2050 int SplatIndex = -1;
2051 for (const auto &M : Mask) {
2052 if (M != -1) {
2053 SplatIndex = M;
2054 break;
2055 }
2056 }
2057
2058 if (SplatIndex == -1)
2059 return DAG.getUNDEF(VT);
2060
2061 const auto &Begin = Mask.begin();
2062 const auto &End = Mask.end();
2063 unsigned HalfSize = Mask.size() / 2;
2064
2065 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2066 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2067 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2068 0)) {
2069 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2070 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2071 }
2072
2073 return SDValue();
2074}
2075
2076/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2077static SDValue
2079 SDValue V1, SDValue V2, SelectionDAG &DAG,
2080 const LoongArchSubtarget &Subtarget) {
2081 // When the size is less than or equal to 4, lower cost instructions may be
2082 // used.
2083 if (Mask.size() <= 4)
2084 return SDValue();
2085 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2086}
2087
2088/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2090 MVT VT, SDValue V1, SDValue V2,
2091 SelectionDAG &DAG) {
2092 // LoongArch LASX only have XVPERM_W.
2093 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2094 return SDValue();
2095
2096 unsigned NumElts = VT.getVectorNumElements();
2097 unsigned HalfSize = NumElts / 2;
2098 bool FrontLo = true, FrontHi = true;
2099 bool BackLo = true, BackHi = true;
2100
2101 auto inRange = [](int val, int low, int high) {
2102 return (val == -1) || (val >= low && val < high);
2103 };
2104
2105 for (unsigned i = 0; i < HalfSize; ++i) {
2106 int Fronti = Mask[i];
2107 int Backi = Mask[i + HalfSize];
2108
2109 FrontLo &= inRange(Fronti, 0, HalfSize);
2110 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2111 BackLo &= inRange(Backi, 0, HalfSize);
2112 BackHi &= inRange(Backi, HalfSize, NumElts);
2113 }
2114
2115 // If both the lower and upper 128-bit parts access only one half of the
2116 // vector (either lower or upper), avoid using xvperm.w. The latency of
2117 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2118 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2119 return SDValue();
2120
2122 for (unsigned i = 0; i < NumElts; ++i)
2123 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2124 : DAG.getConstant(Mask[i], DL, MVT::i64));
2125 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2126
2127 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2128}
2129
2130/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2132 MVT VT, SDValue V1, SDValue V2,
2133 SelectionDAG &DAG) {
2134 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2135}
2136
2137/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2139 MVT VT, SDValue V1, SDValue V2,
2140 SelectionDAG &DAG) {
2141 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2142}
2143
2144/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2146 MVT VT, SDValue V1, SDValue V2,
2147 SelectionDAG &DAG) {
2148
2149 const auto &Begin = Mask.begin();
2150 const auto &End = Mask.end();
2151 unsigned HalfSize = Mask.size() / 2;
2152 unsigned LeftSize = HalfSize / 2;
2153 SDValue OriV1 = V1, OriV2 = V2;
2154
2155 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2156 1) &&
2157 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2158 V1 = OriV1;
2159 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2160 Mask.size() + HalfSize - LeftSize, 1) &&
2161 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2162 Mask.size() + HalfSize + LeftSize, 1))
2163 V1 = OriV2;
2164 else
2165 return SDValue();
2166
2167 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2168 1) &&
2169 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2170 1))
2171 V2 = OriV1;
2172 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2173 Mask.size() + HalfSize - LeftSize, 1) &&
2174 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2175 Mask.size() + HalfSize + LeftSize, 1))
2176 V2 = OriV2;
2177 else
2178 return SDValue();
2179
2180 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2181}
2182
2183/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2185 MVT VT, SDValue V1, SDValue V2,
2186 SelectionDAG &DAG) {
2187
2188 const auto &Begin = Mask.begin();
2189 const auto &End = Mask.end();
2190 unsigned HalfSize = Mask.size() / 2;
2191 SDValue OriV1 = V1, OriV2 = V2;
2192
2193 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2194 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2195 V1 = OriV1;
2196 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2197 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2198 Mask.size() + HalfSize, 1))
2199 V1 = OriV2;
2200 else
2201 return SDValue();
2202
2203 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2204 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2205 V2 = OriV1;
2206 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2207 1) &&
2208 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2209 Mask.size() + HalfSize, 1))
2210 V2 = OriV2;
2211 else
2212 return SDValue();
2213
2214 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2215}
2216
2217/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2219 MVT VT, SDValue V1, SDValue V2,
2220 SelectionDAG &DAG) {
2221
2222 const auto &Begin = Mask.begin();
2223 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2224 const auto &Mid = Mask.begin() + Mask.size() / 2;
2225 const auto &RightMid = Mask.end() - Mask.size() / 4;
2226 const auto &End = Mask.end();
2227 unsigned HalfSize = Mask.size() / 2;
2228 SDValue OriV1 = V1, OriV2 = V2;
2229
2230 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2231 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2232 V1 = OriV1;
2233 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2234 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2235 V1 = OriV2;
2236 else
2237 return SDValue();
2238
2239 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2240 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2241 V2 = OriV1;
2242 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2243 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2244 V2 = OriV2;
2245
2246 else
2247 return SDValue();
2248
2249 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2250}
2251
2252/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2254 MVT VT, SDValue V1, SDValue V2,
2255 SelectionDAG &DAG) {
2256
2257 const auto &Begin = Mask.begin();
2258 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2259 const auto &Mid = Mask.begin() + Mask.size() / 2;
2260 const auto &RightMid = Mask.end() - Mask.size() / 4;
2261 const auto &End = Mask.end();
2262 unsigned HalfSize = Mask.size() / 2;
2263 SDValue OriV1 = V1, OriV2 = V2;
2264
2265 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2266 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2267 V1 = OriV1;
2268 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2269 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2270 2))
2271 V1 = OriV2;
2272 else
2273 return SDValue();
2274
2275 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2276 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2277 V2 = OriV1;
2278 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2279 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2280 2))
2281 V2 = OriV2;
2282 else
2283 return SDValue();
2284
2285 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2286}
2287
2288/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2290 MVT VT, SDValue V1, SDValue V2,
2291 SelectionDAG &DAG) {
2292
2293 int MaskSize = Mask.size();
2294 int HalfSize = Mask.size() / 2;
2295 const auto &Begin = Mask.begin();
2296 const auto &Mid = Mask.begin() + HalfSize;
2297 const auto &End = Mask.end();
2298
2299 // VECTOR_SHUFFLE concatenates the vectors:
2300 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2301 // shuffling ->
2302 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2303 //
2304 // XVSHUF concatenates the vectors:
2305 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2306 // shuffling ->
2307 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2308 SmallVector<SDValue, 8> MaskAlloc;
2309 for (auto it = Begin; it < Mid; it++) {
2310 if (*it < 0) // UNDEF
2311 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2312 else if ((*it >= 0 && *it < HalfSize) ||
2313 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2314 int M = *it < HalfSize ? *it : *it - HalfSize;
2315 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2316 } else
2317 return SDValue();
2318 }
2319 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2320
2321 for (auto it = Mid; it < End; it++) {
2322 if (*it < 0) // UNDEF
2323 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2324 else if ((*it >= HalfSize && *it < MaskSize) ||
2325 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2326 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2327 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2328 } else
2329 return SDValue();
2330 }
2331 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2332
2333 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2334 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2335 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2336}
2337
2338/// Shuffle vectors by lane to generate more optimized instructions.
2339/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2340///
2341/// Therefore, except for the following four cases, other cases are regarded
2342/// as cross-lane shuffles, where optimization is relatively limited.
2343///
2344/// - Shuffle high, low lanes of two inputs vector
2345/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2346/// - Shuffle low, high lanes of two inputs vector
2347/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2348/// - Shuffle low, low lanes of two inputs vector
2349/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2350/// - Shuffle high, high lanes of two inputs vector
2351/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2352///
2353/// The first case is the closest to LoongArch instructions and the other
2354/// cases need to be converted to it for processing.
2355///
2356/// This function may modify V1, V2 and Mask
2358 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2359 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2360
2361 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2362
2363 int MaskSize = Mask.size();
2364 int HalfSize = Mask.size() / 2;
2365 MVT GRLenVT = Subtarget.getGRLenVT();
2366
2367 HalfMaskType preMask = None, postMask = None;
2368
2369 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2370 return M < 0 || (M >= 0 && M < HalfSize) ||
2371 (M >= MaskSize && M < MaskSize + HalfSize);
2372 }))
2373 preMask = HighLaneTy;
2374 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2375 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2376 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2377 }))
2378 preMask = LowLaneTy;
2379
2380 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2381 return M < 0 || (M >= 0 && M < HalfSize) ||
2382 (M >= MaskSize && M < MaskSize + HalfSize);
2383 }))
2384 postMask = HighLaneTy;
2385 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2386 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2387 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2388 }))
2389 postMask = LowLaneTy;
2390
2391 // The pre-half of mask is high lane type, and the post-half of mask
2392 // is low lane type, which is closest to the LoongArch instructions.
2393 //
2394 // Note: In the LoongArch architecture, the high lane of mask corresponds
2395 // to the lower 128-bit of vector register, and the low lane of mask
2396 // corresponds the higher 128-bit of vector register.
2397 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2398 return;
2399 }
2400 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2401 V1 = DAG.getBitcast(MVT::v4i64, V1);
2402 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2403 DAG.getConstant(0b01001110, DL, GRLenVT));
2404 V1 = DAG.getBitcast(VT, V1);
2405
2406 if (!V2.isUndef()) {
2407 V2 = DAG.getBitcast(MVT::v4i64, V2);
2408 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2409 DAG.getConstant(0b01001110, DL, GRLenVT));
2410 V2 = DAG.getBitcast(VT, V2);
2411 }
2412
2413 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2414 *it = *it < 0 ? *it : *it - HalfSize;
2415 }
2416 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2417 *it = *it < 0 ? *it : *it + HalfSize;
2418 }
2419 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2420 V1 = DAG.getBitcast(MVT::v4i64, V1);
2421 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2422 DAG.getConstant(0b11101110, DL, GRLenVT));
2423 V1 = DAG.getBitcast(VT, V1);
2424
2425 if (!V2.isUndef()) {
2426 V2 = DAG.getBitcast(MVT::v4i64, V2);
2427 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2428 DAG.getConstant(0b11101110, DL, GRLenVT));
2429 V2 = DAG.getBitcast(VT, V2);
2430 }
2431
2432 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2433 *it = *it < 0 ? *it : *it - HalfSize;
2434 }
2435 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2436 V1 = DAG.getBitcast(MVT::v4i64, V1);
2437 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2438 DAG.getConstant(0b01000100, DL, GRLenVT));
2439 V1 = DAG.getBitcast(VT, V1);
2440
2441 if (!V2.isUndef()) {
2442 V2 = DAG.getBitcast(MVT::v4i64, V2);
2443 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2444 DAG.getConstant(0b01000100, DL, GRLenVT));
2445 V2 = DAG.getBitcast(VT, V2);
2446 }
2447
2448 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2449 *it = *it < 0 ? *it : *it + HalfSize;
2450 }
2451 } else { // cross-lane
2452 return;
2453 }
2454}
2455
2456/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2457/// Only for 256-bit vector.
2458///
2459/// For example:
2460/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2461/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2462/// is lowerded to:
2463/// (XVPERMI $xr2, $xr0, 78)
2464/// (XVSHUF $xr1, $xr2, $xr0)
2465/// (XVORI $xr0, $xr1, 0)
2467 ArrayRef<int> Mask,
2468 MVT VT, SDValue V1,
2469 SDValue V2,
2470 SelectionDAG &DAG) {
2471 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2472 int Size = Mask.size();
2473 int LaneSize = Size / 2;
2474
2475 bool LaneCrossing[2] = {false, false};
2476 for (int i = 0; i < Size; ++i)
2477 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2478 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2479
2480 // Ensure that all lanes ared involved.
2481 if (!LaneCrossing[0] && !LaneCrossing[1])
2482 return SDValue();
2483
2484 SmallVector<int> InLaneMask;
2485 InLaneMask.assign(Mask.begin(), Mask.end());
2486 for (int i = 0; i < Size; ++i) {
2487 int &M = InLaneMask[i];
2488 if (M < 0)
2489 continue;
2490 if (((M % Size) / LaneSize) != (i / LaneSize))
2491 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2492 }
2493
2494 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2495 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2496 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2497 Flipped = DAG.getBitcast(VT, Flipped);
2498 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2499}
2500
2501/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2502///
2503/// This routine breaks down the specific type of 256-bit shuffle and
2504/// dispatches to the lowering routines accordingly.
2506 SDValue V1, SDValue V2, SelectionDAG &DAG,
2507 const LoongArchSubtarget &Subtarget) {
2508 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2509 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2510 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2511 "Vector type is unsupported for lasx!");
2513 "Two operands have different types!");
2514 assert(VT.getVectorNumElements() == Mask.size() &&
2515 "Unexpected mask size for shuffle!");
2516 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2517 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2518
2519 // canonicalize non cross-lane shuffle vector
2520 SmallVector<int> NewMask(Mask);
2521 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2522
2523 APInt KnownUndef, KnownZero;
2524 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2525 APInt Zeroable = KnownUndef | KnownZero;
2526
2527 SDValue Result;
2528 // TODO: Add more comparison patterns.
2529 if (V2.isUndef()) {
2530 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2531 Subtarget)))
2532 return Result;
2533 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2534 Subtarget)))
2535 return Result;
2536 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2537 return Result;
2538 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2539 V1, V2, DAG)))
2540 return Result;
2541
2542 // TODO: This comment may be enabled in the future to better match the
2543 // pattern for instruction selection.
2544 /* V2 = V1; */
2545 }
2546
2547 // It is recommended not to change the pattern comparison order for better
2548 // performance.
2549 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2550 return Result;
2551 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2552 return Result;
2553 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2554 return Result;
2555 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2556 return Result;
2557 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2558 return Result;
2559 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2560 return Result;
2561 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2562 Subtarget, Zeroable)))
2563 return Result;
2564 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2565 Subtarget)))
2566 return Result;
2567 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2568 return NewShuffle;
2569 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2570 return Result;
2571
2572 return SDValue();
2573}
2574
2575SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2576 SelectionDAG &DAG) const {
2577 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2578 ArrayRef<int> OrigMask = SVOp->getMask();
2579 SDValue V1 = Op.getOperand(0);
2580 SDValue V2 = Op.getOperand(1);
2581 MVT VT = Op.getSimpleValueType();
2582 int NumElements = VT.getVectorNumElements();
2583 SDLoc DL(Op);
2584
2585 bool V1IsUndef = V1.isUndef();
2586 bool V2IsUndef = V2.isUndef();
2587 if (V1IsUndef && V2IsUndef)
2588 return DAG.getUNDEF(VT);
2589
2590 // When we create a shuffle node we put the UNDEF node to second operand,
2591 // but in some cases the first operand may be transformed to UNDEF.
2592 // In this case we should just commute the node.
2593 if (V1IsUndef)
2594 return DAG.getCommutedVectorShuffle(*SVOp);
2595
2596 // Check for non-undef masks pointing at an undef vector and make the masks
2597 // undef as well. This makes it easier to match the shuffle based solely on
2598 // the mask.
2599 if (V2IsUndef &&
2600 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2601 SmallVector<int, 8> NewMask(OrigMask);
2602 for (int &M : NewMask)
2603 if (M >= NumElements)
2604 M = -1;
2605 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2606 }
2607
2608 // Check for illegal shuffle mask element index values.
2609 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2610 (void)MaskUpperLimit;
2611 assert(llvm::all_of(OrigMask,
2612 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2613 "Out of bounds shuffle index");
2614
2615 // For each vector width, delegate to a specialized lowering routine.
2616 if (VT.is128BitVector())
2617 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2618
2619 if (VT.is256BitVector())
2620 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2621
2622 return SDValue();
2623}
2624
2625SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2626 SelectionDAG &DAG) const {
2627 // Custom lower to ensure the libcall return is passed in an FPR on hard
2628 // float ABIs.
2629 SDLoc DL(Op);
2630 MakeLibCallOptions CallOptions;
2631 SDValue Op0 = Op.getOperand(0);
2632 SDValue Chain = SDValue();
2633 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2634 SDValue Res;
2635 std::tie(Res, Chain) =
2636 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2637 if (Subtarget.is64Bit())
2638 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2639 return DAG.getBitcast(MVT::i32, Res);
2640}
2641
2642SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2643 SelectionDAG &DAG) const {
2644 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2645 // float ABIs.
2646 SDLoc DL(Op);
2647 MakeLibCallOptions CallOptions;
2648 SDValue Op0 = Op.getOperand(0);
2649 SDValue Chain = SDValue();
2650 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2651 DL, MVT::f32, Op0)
2652 : DAG.getBitcast(MVT::f32, Op0);
2653 SDValue Res;
2654 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2655 CallOptions, DL, Chain);
2656 return Res;
2657}
2658
2659SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2660 SelectionDAG &DAG) const {
2661 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2662 SDLoc DL(Op);
2663 MakeLibCallOptions CallOptions;
2664 RTLIB::Libcall LC =
2665 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2666 SDValue Res =
2667 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2668 if (Subtarget.is64Bit())
2669 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2670 return DAG.getBitcast(MVT::i32, Res);
2671}
2672
2673SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2674 SelectionDAG &DAG) const {
2675 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2676 MVT VT = Op.getSimpleValueType();
2677 SDLoc DL(Op);
2678 Op = DAG.getNode(
2679 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2680 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2681 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2682 DL, MVT::f32, Op)
2683 : DAG.getBitcast(MVT::f32, Op);
2684 if (VT != MVT::f32)
2685 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2686 return Res;
2687}
2688
2689// Lower BUILD_VECTOR as broadcast load (if possible).
2690// For example:
2691// %a = load i8, ptr %ptr
2692// %b = build_vector %a, %a, %a, %a
2693// is lowered to :
2694// (VLDREPL_B $a0, 0)
2696 const SDLoc &DL,
2697 SelectionDAG &DAG) {
2698 MVT VT = BVOp->getSimpleValueType(0);
2699 int NumOps = BVOp->getNumOperands();
2700
2701 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2702 "Unsupported vector type for broadcast.");
2703
2704 SDValue IdentitySrc;
2705 bool IsIdeneity = true;
2706
2707 for (int i = 0; i != NumOps; i++) {
2708 SDValue Op = BVOp->getOperand(i);
2709 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2710 IsIdeneity = false;
2711 break;
2712 }
2713 IdentitySrc = BVOp->getOperand(0);
2714 }
2715
2716 // make sure that this load is valid and only has one user.
2717 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2718 return SDValue();
2719
2720 auto *LN = cast<LoadSDNode>(IdentitySrc);
2721 auto ExtType = LN->getExtensionType();
2722
2723 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2724 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2725 SDVTList Tys =
2726 LN->isIndexed()
2727 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2728 : DAG.getVTList(VT, MVT::Other);
2729 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2730 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2731 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2732 return BCast;
2733 }
2734 return SDValue();
2735}
2736
2737// Sequentially insert elements from Ops into Vector, from low to high indices.
2738// Note: Ops can have fewer elements than Vector.
2740 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2741 EVT ResTy) {
2742 assert(Ops.size() <= ResTy.getVectorNumElements());
2743
2744 SDValue Op0 = Ops[0];
2745 if (!Op0.isUndef())
2746 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2747 for (unsigned i = 1; i < Ops.size(); ++i) {
2748 SDValue Opi = Ops[i];
2749 if (Opi.isUndef())
2750 continue;
2751 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2752 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2753 }
2754}
2755
2756// Build a ResTy subvector from Node, taking NumElts elements starting at index
2757// 'first'.
2759 SelectionDAG &DAG, SDLoc DL,
2760 const LoongArchSubtarget &Subtarget,
2761 EVT ResTy, unsigned first) {
2762 unsigned NumElts = ResTy.getVectorNumElements();
2763
2764 assert(first >= 0 &&
2765 first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2766
2767 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2768 Node->op_begin() + first + NumElts);
2769 SDValue Vector = DAG.getUNDEF(ResTy);
2770 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2771 return Vector;
2772}
2773
2774SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2775 SelectionDAG &DAG) const {
2776 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2777 MVT VT = Node->getSimpleValueType(0);
2778 EVT ResTy = Op->getValueType(0);
2779 unsigned NumElts = ResTy.getVectorNumElements();
2780 SDLoc DL(Op);
2781 APInt SplatValue, SplatUndef;
2782 unsigned SplatBitSize;
2783 bool HasAnyUndefs;
2784 bool IsConstant = false;
2785 bool UseSameConstant = true;
2786 SDValue ConstantValue;
2787 bool Is128Vec = ResTy.is128BitVector();
2788 bool Is256Vec = ResTy.is256BitVector();
2789
2790 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2791 (!Subtarget.hasExtLASX() || !Is256Vec))
2792 return SDValue();
2793
2794 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2795 return Result;
2796
2797 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2798 /*MinSplatBits=*/8) &&
2799 SplatBitSize <= 64) {
2800 // We can only cope with 8, 16, 32, or 64-bit elements.
2801 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2802 SplatBitSize != 64)
2803 return SDValue();
2804
2805 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2806 // We can only handle 64-bit elements that are within
2807 // the signed 10-bit range on 32-bit targets.
2808 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2809 if (!SplatValue.isSignedIntN(10))
2810 return SDValue();
2811 if ((Is128Vec && ResTy == MVT::v4i32) ||
2812 (Is256Vec && ResTy == MVT::v8i32))
2813 return Op;
2814 }
2815
2816 EVT ViaVecTy;
2817
2818 switch (SplatBitSize) {
2819 default:
2820 return SDValue();
2821 case 8:
2822 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2823 break;
2824 case 16:
2825 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2826 break;
2827 case 32:
2828 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2829 break;
2830 case 64:
2831 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2832 break;
2833 }
2834
2835 // SelectionDAG::getConstant will promote SplatValue appropriately.
2836 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2837
2838 // Bitcast to the type we originally wanted.
2839 if (ViaVecTy != ResTy)
2840 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2841
2842 return Result;
2843 }
2844
2845 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2846 return Op;
2847
2848 for (unsigned i = 0; i < NumElts; ++i) {
2849 SDValue Opi = Node->getOperand(i);
2850 if (isIntOrFPConstant(Opi)) {
2851 IsConstant = true;
2852 if (!ConstantValue.getNode())
2853 ConstantValue = Opi;
2854 else if (ConstantValue != Opi)
2855 UseSameConstant = false;
2856 }
2857 }
2858
2859 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2860 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2861 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2862 for (unsigned i = 0; i < NumElts; ++i) {
2863 SDValue Opi = Node->getOperand(i);
2864 if (!isIntOrFPConstant(Opi))
2865 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2866 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2867 }
2868 return Result;
2869 }
2870
2871 if (!IsConstant) {
2872 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2873 // the sub-sequence of the vector and then broadcast the sub-sequence.
2874 //
2875 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2876 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2877 // generates worse code in some cases. This could be further optimized
2878 // with more consideration.
2880 BitVector UndefElements;
2881 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2882 UndefElements.count() == 0) {
2883 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2884 // because the high part can be simply treated as undef.
2885 SDValue Vector = DAG.getUNDEF(ResTy);
2886 EVT FillTy = Is256Vec
2888 : ResTy;
2889 SDValue FillVec =
2890 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
2891
2892 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
2893
2894 unsigned SeqLen = Sequence.size();
2895 unsigned SplatLen = NumElts / SeqLen;
2896 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2897 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2898
2899 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
2900 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
2901 if (SplatEltTy == MVT::i128)
2902 SplatTy = MVT::v4i64;
2903
2904 SDValue SplatVec;
2905 SDValue SrcVec = DAG.getBitcast(
2906 SplatTy,
2907 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
2908 if (Is256Vec) {
2909 SplatVec =
2910 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
2912 DL, SplatTy, SrcVec);
2913 } else {
2914 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
2915 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2916 }
2917
2918 return DAG.getBitcast(ResTy, SplatVec);
2919 }
2920
2921 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
2922 // using memory operations is much lower.
2923 //
2924 // For 256-bit vectors, normally split into two halves and concatenate.
2925 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
2926 // one non-undef element, skip spliting to avoid a worse result.
2927 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
2928 ResTy == MVT::v4f64) {
2929 unsigned NonUndefCount = 0;
2930 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
2931 if (!Node->getOperand(i).isUndef()) {
2932 ++NonUndefCount;
2933 if (NonUndefCount > 1)
2934 break;
2935 }
2936 }
2937 if (NonUndefCount == 1)
2938 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
2939 }
2940
2941 EVT VecTy =
2942 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
2943 SDValue Vector =
2944 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
2945
2946 if (Is128Vec)
2947 return Vector;
2948
2949 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
2950 VecTy, NumElts / 2);
2951
2952 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
2953 }
2954
2955 return SDValue();
2956}
2957
2958SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2959 SelectionDAG &DAG) const {
2960 SDLoc DL(Op);
2961 MVT ResVT = Op.getSimpleValueType();
2962 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2963
2964 unsigned NumOperands = Op.getNumOperands();
2965 unsigned NumFreezeUndef = 0;
2966 unsigned NumZero = 0;
2967 unsigned NumNonZero = 0;
2968 unsigned NonZeros = 0;
2969 SmallSet<SDValue, 4> Undefs;
2970 for (unsigned i = 0; i != NumOperands; ++i) {
2971 SDValue SubVec = Op.getOperand(i);
2972 if (SubVec.isUndef())
2973 continue;
2974 if (ISD::isFreezeUndef(SubVec.getNode())) {
2975 // If the freeze(undef) has multiple uses then we must fold to zero.
2976 if (SubVec.hasOneUse()) {
2977 ++NumFreezeUndef;
2978 } else {
2979 ++NumZero;
2980 Undefs.insert(SubVec);
2981 }
2982 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2983 ++NumZero;
2984 else {
2985 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2986 NonZeros |= 1 << i;
2987 ++NumNonZero;
2988 }
2989 }
2990
2991 // If we have more than 2 non-zeros, build each half separately.
2992 if (NumNonZero > 2) {
2993 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2994 ArrayRef<SDUse> Ops = Op->ops();
2995 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2996 Ops.slice(0, NumOperands / 2));
2997 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2998 Ops.slice(NumOperands / 2));
2999 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3000 }
3001
3002 // Otherwise, build it up through insert_subvectors.
3003 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3004 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3005 : DAG.getUNDEF(ResVT));
3006
3007 // Replace Undef operands with ZeroVector.
3008 for (SDValue U : Undefs)
3009 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3010
3011 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3012 unsigned NumSubElems = SubVT.getVectorNumElements();
3013 for (unsigned i = 0; i != NumOperands; ++i) {
3014 if ((NonZeros & (1 << i)) == 0)
3015 continue;
3016
3017 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3018 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3019 }
3020
3021 return Vec;
3022}
3023
3024SDValue
3025LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3026 SelectionDAG &DAG) const {
3027 MVT EltVT = Op.getSimpleValueType();
3028 SDValue Vec = Op->getOperand(0);
3029 EVT VecTy = Vec->getValueType(0);
3030 SDValue Idx = Op->getOperand(1);
3031 SDLoc DL(Op);
3032 MVT GRLenVT = Subtarget.getGRLenVT();
3033
3034 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3035
3036 if (isa<ConstantSDNode>(Idx))
3037 return Op;
3038
3039 switch (VecTy.getSimpleVT().SimpleTy) {
3040 default:
3041 llvm_unreachable("Unexpected type");
3042 case MVT::v32i8:
3043 case MVT::v16i16:
3044 case MVT::v4i64:
3045 case MVT::v4f64: {
3046 // Extract the high half subvector and place it to the low half of a new
3047 // vector. It doesn't matter what the high half of the new vector is.
3048 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3049 SDValue VecHi =
3050 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3051 SDValue TmpVec =
3052 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3053 VecHi, DAG.getConstant(0, DL, GRLenVT));
3054
3055 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3056 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3057 // desired element.
3058 SDValue IdxCp =
3059 Subtarget.is64Bit()
3060 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3061 : DAG.getBitcast(MVT::f32, Idx);
3062 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3063 SDValue MaskVec =
3064 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3065 SDValue ResVec =
3066 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3067
3068 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3069 DAG.getConstant(0, DL, GRLenVT));
3070 }
3071 case MVT::v8i32:
3072 case MVT::v8f32: {
3073 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3074 SDValue SplatValue =
3075 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3076
3077 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3078 DAG.getConstant(0, DL, GRLenVT));
3079 }
3080 }
3081}
3082
3083SDValue
3084LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3085 SelectionDAG &DAG) const {
3086 MVT VT = Op.getSimpleValueType();
3087 MVT EltVT = VT.getVectorElementType();
3088 unsigned NumElts = VT.getVectorNumElements();
3089 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3090 SDLoc DL(Op);
3091 SDValue Op0 = Op.getOperand(0);
3092 SDValue Op1 = Op.getOperand(1);
3093 SDValue Op2 = Op.getOperand(2);
3094
3095 if (isa<ConstantSDNode>(Op2))
3096 return Op;
3097
3098 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3099 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3100
3101 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3102 return SDValue();
3103
3104 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3105 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3106
3107 SmallVector<SDValue, 32> RawIndices;
3108 for (unsigned i = 0; i < NumElts; ++i)
3109 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3110 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3111
3112 // insert vec, elt, idx
3113 // =>
3114 // select (splatidx == {0,1,2...}) ? splatelt : vec
3115 SDValue SelectCC =
3116 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3117 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3118}
3119
3120SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3121 SelectionDAG &DAG) const {
3122 SDLoc DL(Op);
3123 SyncScope::ID FenceSSID =
3124 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3125
3126 // singlethread fences only synchronize with signal handlers on the same
3127 // thread and thus only need to preserve instruction order, not actually
3128 // enforce memory ordering.
3129 if (FenceSSID == SyncScope::SingleThread)
3130 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3131 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3132
3133 return Op;
3134}
3135
3136SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3137 SelectionDAG &DAG) const {
3138
3139 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3140 DAG.getContext()->emitError(
3141 "On LA64, only 64-bit registers can be written.");
3142 return Op.getOperand(0);
3143 }
3144
3145 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3146 DAG.getContext()->emitError(
3147 "On LA32, only 32-bit registers can be written.");
3148 return Op.getOperand(0);
3149 }
3150
3151 return Op;
3152}
3153
3154SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3155 SelectionDAG &DAG) const {
3156 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3157 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3158 "be a constant integer");
3159 return SDValue();
3160 }
3161
3162 MachineFunction &MF = DAG.getMachineFunction();
3164 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3165 EVT VT = Op.getValueType();
3166 SDLoc DL(Op);
3167 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3168 unsigned Depth = Op.getConstantOperandVal(0);
3169 int GRLenInBytes = Subtarget.getGRLen() / 8;
3170
3171 while (Depth--) {
3172 int Offset = -(GRLenInBytes * 2);
3173 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3174 DAG.getSignedConstant(Offset, DL, VT));
3175 FrameAddr =
3176 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3177 }
3178 return FrameAddr;
3179}
3180
3181SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3182 SelectionDAG &DAG) const {
3183 // Currently only support lowering return address for current frame.
3184 if (Op.getConstantOperandVal(0) != 0) {
3185 DAG.getContext()->emitError(
3186 "return address can only be determined for the current frame");
3187 return SDValue();
3188 }
3189
3190 MachineFunction &MF = DAG.getMachineFunction();
3192 MVT GRLenVT = Subtarget.getGRLenVT();
3193
3194 // Return the value of the return address register, marking it an implicit
3195 // live-in.
3196 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3197 getRegClassFor(GRLenVT));
3198 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3199}
3200
3201SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3202 SelectionDAG &DAG) const {
3203 MachineFunction &MF = DAG.getMachineFunction();
3204 auto Size = Subtarget.getGRLen() / 8;
3205 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3206 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3207}
3208
3209SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3210 SelectionDAG &DAG) const {
3211 MachineFunction &MF = DAG.getMachineFunction();
3212 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3213
3214 SDLoc DL(Op);
3215 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3217
3218 // vastart just stores the address of the VarArgsFrameIndex slot into the
3219 // memory location argument.
3220 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3221 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3222 MachinePointerInfo(SV));
3223}
3224
3225SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3226 SelectionDAG &DAG) const {
3227 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3228 !Subtarget.hasBasicD() && "unexpected target features");
3229
3230 SDLoc DL(Op);
3231 SDValue Op0 = Op.getOperand(0);
3232 if (Op0->getOpcode() == ISD::AND) {
3233 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3234 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3235 return Op;
3236 }
3237
3238 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3239 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3240 Op0.getConstantOperandVal(2) == UINT64_C(0))
3241 return Op;
3242
3243 if (Op0.getOpcode() == ISD::AssertZext &&
3244 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3245 return Op;
3246
3247 EVT OpVT = Op0.getValueType();
3248 EVT RetVT = Op.getValueType();
3249 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3250 MakeLibCallOptions CallOptions;
3251 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3252 SDValue Chain = SDValue();
3254 std::tie(Result, Chain) =
3255 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3256 return Result;
3257}
3258
3259SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3260 SelectionDAG &DAG) const {
3261 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3262 !Subtarget.hasBasicD() && "unexpected target features");
3263
3264 SDLoc DL(Op);
3265 SDValue Op0 = Op.getOperand(0);
3266
3267 if ((Op0.getOpcode() == ISD::AssertSext ||
3269 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3270 return Op;
3271
3272 EVT OpVT = Op0.getValueType();
3273 EVT RetVT = Op.getValueType();
3274 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3275 MakeLibCallOptions CallOptions;
3276 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3277 SDValue Chain = SDValue();
3279 std::tie(Result, Chain) =
3280 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3281 return Result;
3282}
3283
3284SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3285 SelectionDAG &DAG) const {
3286
3287 SDLoc DL(Op);
3288 EVT VT = Op.getValueType();
3289 SDValue Op0 = Op.getOperand(0);
3290 EVT Op0VT = Op0.getValueType();
3291
3292 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3293 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3294 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3295 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3296 }
3297 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3298 SDValue Lo, Hi;
3299 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3300 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3301 }
3302 return Op;
3303}
3304
3305SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3306 SelectionDAG &DAG) const {
3307
3308 SDLoc DL(Op);
3309 SDValue Op0 = Op.getOperand(0);
3310
3311 if (Op0.getValueType() == MVT::f16)
3312 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3313
3314 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3315 !Subtarget.hasBasicD()) {
3316 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3317 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3318 }
3319
3320 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3321 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3322 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3323}
3324
3326 SelectionDAG &DAG, unsigned Flags) {
3327 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3328}
3329
3331 SelectionDAG &DAG, unsigned Flags) {
3332 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3333 Flags);
3334}
3335
3337 SelectionDAG &DAG, unsigned Flags) {
3338 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3339 N->getOffset(), Flags);
3340}
3341
3343 SelectionDAG &DAG, unsigned Flags) {
3344 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3345}
3346
3347template <class NodeTy>
3348SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3350 bool IsLocal) const {
3351 SDLoc DL(N);
3352 EVT Ty = getPointerTy(DAG.getDataLayout());
3353 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3354 SDValue Load;
3355
3356 switch (M) {
3357 default:
3358 report_fatal_error("Unsupported code model");
3359
3360 case CodeModel::Large: {
3361 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3362
3363 // This is not actually used, but is necessary for successfully matching
3364 // the PseudoLA_*_LARGE nodes.
3365 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3366 if (IsLocal) {
3367 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3368 // eventually becomes the desired 5-insn code sequence.
3369 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3370 Tmp, Addr),
3371 0);
3372 } else {
3373 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3374 // eventually becomes the desired 5-insn code sequence.
3375 Load = SDValue(
3376 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3377 0);
3378 }
3379 break;
3380 }
3381
3382 case CodeModel::Small:
3383 case CodeModel::Medium:
3384 if (IsLocal) {
3385 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3386 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3387 Load = SDValue(
3388 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3389 } else {
3390 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3391 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3392 Load =
3393 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3394 }
3395 }
3396
3397 if (!IsLocal) {
3398 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3399 MachineFunction &MF = DAG.getMachineFunction();
3400 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3404 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3405 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3406 }
3407
3408 return Load;
3409}
3410
3411SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3412 SelectionDAG &DAG) const {
3413 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3414 DAG.getTarget().getCodeModel());
3415}
3416
3417SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3418 SelectionDAG &DAG) const {
3419 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3420 DAG.getTarget().getCodeModel());
3421}
3422
3423SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3424 SelectionDAG &DAG) const {
3425 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3426 DAG.getTarget().getCodeModel());
3427}
3428
3429SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3430 SelectionDAG &DAG) const {
3431 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3432 assert(N->getOffset() == 0 && "unexpected offset in global node");
3433 auto CM = DAG.getTarget().getCodeModel();
3434 const GlobalValue *GV = N->getGlobal();
3435
3436 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3437 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3438 CM = *GCM;
3439 }
3440
3441 return getAddr(N, DAG, CM, GV->isDSOLocal());
3442}
3443
3444SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3445 SelectionDAG &DAG,
3446 unsigned Opc, bool UseGOT,
3447 bool Large) const {
3448 SDLoc DL(N);
3449 EVT Ty = getPointerTy(DAG.getDataLayout());
3450 MVT GRLenVT = Subtarget.getGRLenVT();
3451
3452 // This is not actually used, but is necessary for successfully matching the
3453 // PseudoLA_*_LARGE nodes.
3454 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3455 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3456
3457 // Only IE needs an extra argument for large code model.
3458 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3459 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3460 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3461
3462 // If it is LE for normal/medium code model, the add tp operation will occur
3463 // during the pseudo-instruction expansion.
3464 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3465 return Offset;
3466
3467 if (UseGOT) {
3468 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3469 MachineFunction &MF = DAG.getMachineFunction();
3470 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3474 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3475 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3476 }
3477
3478 // Add the thread pointer.
3479 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3480 DAG.getRegister(LoongArch::R2, GRLenVT));
3481}
3482
3483SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3484 SelectionDAG &DAG,
3485 unsigned Opc,
3486 bool Large) const {
3487 SDLoc DL(N);
3488 EVT Ty = getPointerTy(DAG.getDataLayout());
3489 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3490
3491 // This is not actually used, but is necessary for successfully matching the
3492 // PseudoLA_*_LARGE nodes.
3493 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3494
3495 // Use a PC-relative addressing mode to access the dynamic GOT address.
3496 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3497 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3498 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3499
3500 // Prepare argument list to generate call.
3502 Args.emplace_back(Load, CallTy);
3503
3504 // Setup call to __tls_get_addr.
3505 TargetLowering::CallLoweringInfo CLI(DAG);
3506 CLI.setDebugLoc(DL)
3507 .setChain(DAG.getEntryNode())
3508 .setLibCallee(CallingConv::C, CallTy,
3509 DAG.getExternalSymbol("__tls_get_addr", Ty),
3510 std::move(Args));
3511
3512 return LowerCallTo(CLI).first;
3513}
3514
3515SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3516 SelectionDAG &DAG, unsigned Opc,
3517 bool Large) const {
3518 SDLoc DL(N);
3519 EVT Ty = getPointerTy(DAG.getDataLayout());
3520 const GlobalValue *GV = N->getGlobal();
3521
3522 // This is not actually used, but is necessary for successfully matching the
3523 // PseudoLA_*_LARGE nodes.
3524 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3525
3526 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3527 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3528 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3529 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3530 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3531}
3532
3533SDValue
3534LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3535 SelectionDAG &DAG) const {
3538 report_fatal_error("In GHC calling convention TLS is not supported");
3539
3540 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3541 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3542
3543 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3544 assert(N->getOffset() == 0 && "unexpected offset in global node");
3545
3546 if (DAG.getTarget().useEmulatedTLS())
3547 reportFatalUsageError("the emulated TLS is prohibited");
3548
3549 bool IsDesc = DAG.getTarget().useTLSDESC();
3550
3551 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3553 // In this model, application code calls the dynamic linker function
3554 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3555 // runtime.
3556 if (!IsDesc)
3557 return getDynamicTLSAddr(N, DAG,
3558 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3559 : LoongArch::PseudoLA_TLS_GD,
3560 Large);
3561 break;
3563 // Same as GeneralDynamic, except for assembly modifiers and relocation
3564 // records.
3565 if (!IsDesc)
3566 return getDynamicTLSAddr(N, DAG,
3567 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3568 : LoongArch::PseudoLA_TLS_LD,
3569 Large);
3570 break;
3572 // This model uses the GOT to resolve TLS offsets.
3573 return getStaticTLSAddr(N, DAG,
3574 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3575 : LoongArch::PseudoLA_TLS_IE,
3576 /*UseGOT=*/true, Large);
3578 // This model is used when static linking as the TLS offsets are resolved
3579 // during program linking.
3580 //
3581 // This node doesn't need an extra argument for the large code model.
3582 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3583 /*UseGOT=*/false, Large);
3584 }
3585
3586 return getTLSDescAddr(N, DAG,
3587 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3588 : LoongArch::PseudoLA_TLS_DESC,
3589 Large);
3590}
3591
3592template <unsigned N>
3594 SelectionDAG &DAG, bool IsSigned = false) {
3595 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3596 // Check the ImmArg.
3597 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3598 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3599 DAG.getContext()->emitError(Op->getOperationName(0) +
3600 ": argument out of range.");
3601 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3602 }
3603 return SDValue();
3604}
3605
3606SDValue
3607LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3608 SelectionDAG &DAG) const {
3609 switch (Op.getConstantOperandVal(0)) {
3610 default:
3611 return SDValue(); // Don't custom lower most intrinsics.
3612 case Intrinsic::thread_pointer: {
3613 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3614 return DAG.getRegister(LoongArch::R2, PtrVT);
3615 }
3616 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3617 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3618 case Intrinsic::loongarch_lsx_vreplvei_d:
3619 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3620 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3621 case Intrinsic::loongarch_lsx_vreplvei_w:
3622 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3623 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3624 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3625 case Intrinsic::loongarch_lasx_xvpickve_d:
3626 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3627 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3628 case Intrinsic::loongarch_lasx_xvinsve0_d:
3629 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3630 case Intrinsic::loongarch_lsx_vsat_b:
3631 case Intrinsic::loongarch_lsx_vsat_bu:
3632 case Intrinsic::loongarch_lsx_vrotri_b:
3633 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3634 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3635 case Intrinsic::loongarch_lsx_vsrlri_b:
3636 case Intrinsic::loongarch_lsx_vsrari_b:
3637 case Intrinsic::loongarch_lsx_vreplvei_h:
3638 case Intrinsic::loongarch_lasx_xvsat_b:
3639 case Intrinsic::loongarch_lasx_xvsat_bu:
3640 case Intrinsic::loongarch_lasx_xvrotri_b:
3641 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3642 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3643 case Intrinsic::loongarch_lasx_xvsrlri_b:
3644 case Intrinsic::loongarch_lasx_xvsrari_b:
3645 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3646 case Intrinsic::loongarch_lasx_xvpickve_w:
3647 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3648 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3649 case Intrinsic::loongarch_lasx_xvinsve0_w:
3650 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3651 case Intrinsic::loongarch_lsx_vsat_h:
3652 case Intrinsic::loongarch_lsx_vsat_hu:
3653 case Intrinsic::loongarch_lsx_vrotri_h:
3654 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3655 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3656 case Intrinsic::loongarch_lsx_vsrlri_h:
3657 case Intrinsic::loongarch_lsx_vsrari_h:
3658 case Intrinsic::loongarch_lsx_vreplvei_b:
3659 case Intrinsic::loongarch_lasx_xvsat_h:
3660 case Intrinsic::loongarch_lasx_xvsat_hu:
3661 case Intrinsic::loongarch_lasx_xvrotri_h:
3662 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3663 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3664 case Intrinsic::loongarch_lasx_xvsrlri_h:
3665 case Intrinsic::loongarch_lasx_xvsrari_h:
3666 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3667 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3668 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3669 case Intrinsic::loongarch_lsx_vsrani_b_h:
3670 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3671 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3672 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3673 case Intrinsic::loongarch_lsx_vssrani_b_h:
3674 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3675 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3676 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3677 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3678 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3679 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3680 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3681 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3682 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3683 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3684 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3685 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3686 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3687 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3688 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3689 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3690 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3691 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3692 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3693 case Intrinsic::loongarch_lsx_vsat_w:
3694 case Intrinsic::loongarch_lsx_vsat_wu:
3695 case Intrinsic::loongarch_lsx_vrotri_w:
3696 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3697 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3698 case Intrinsic::loongarch_lsx_vsrlri_w:
3699 case Intrinsic::loongarch_lsx_vsrari_w:
3700 case Intrinsic::loongarch_lsx_vslei_bu:
3701 case Intrinsic::loongarch_lsx_vslei_hu:
3702 case Intrinsic::loongarch_lsx_vslei_wu:
3703 case Intrinsic::loongarch_lsx_vslei_du:
3704 case Intrinsic::loongarch_lsx_vslti_bu:
3705 case Intrinsic::loongarch_lsx_vslti_hu:
3706 case Intrinsic::loongarch_lsx_vslti_wu:
3707 case Intrinsic::loongarch_lsx_vslti_du:
3708 case Intrinsic::loongarch_lsx_vbsll_v:
3709 case Intrinsic::loongarch_lsx_vbsrl_v:
3710 case Intrinsic::loongarch_lasx_xvsat_w:
3711 case Intrinsic::loongarch_lasx_xvsat_wu:
3712 case Intrinsic::loongarch_lasx_xvrotri_w:
3713 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3714 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3715 case Intrinsic::loongarch_lasx_xvsrlri_w:
3716 case Intrinsic::loongarch_lasx_xvsrari_w:
3717 case Intrinsic::loongarch_lasx_xvslei_bu:
3718 case Intrinsic::loongarch_lasx_xvslei_hu:
3719 case Intrinsic::loongarch_lasx_xvslei_wu:
3720 case Intrinsic::loongarch_lasx_xvslei_du:
3721 case Intrinsic::loongarch_lasx_xvslti_bu:
3722 case Intrinsic::loongarch_lasx_xvslti_hu:
3723 case Intrinsic::loongarch_lasx_xvslti_wu:
3724 case Intrinsic::loongarch_lasx_xvslti_du:
3725 case Intrinsic::loongarch_lasx_xvbsll_v:
3726 case Intrinsic::loongarch_lasx_xvbsrl_v:
3727 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3728 case Intrinsic::loongarch_lsx_vseqi_b:
3729 case Intrinsic::loongarch_lsx_vseqi_h:
3730 case Intrinsic::loongarch_lsx_vseqi_w:
3731 case Intrinsic::loongarch_lsx_vseqi_d:
3732 case Intrinsic::loongarch_lsx_vslei_b:
3733 case Intrinsic::loongarch_lsx_vslei_h:
3734 case Intrinsic::loongarch_lsx_vslei_w:
3735 case Intrinsic::loongarch_lsx_vslei_d:
3736 case Intrinsic::loongarch_lsx_vslti_b:
3737 case Intrinsic::loongarch_lsx_vslti_h:
3738 case Intrinsic::loongarch_lsx_vslti_w:
3739 case Intrinsic::loongarch_lsx_vslti_d:
3740 case Intrinsic::loongarch_lasx_xvseqi_b:
3741 case Intrinsic::loongarch_lasx_xvseqi_h:
3742 case Intrinsic::loongarch_lasx_xvseqi_w:
3743 case Intrinsic::loongarch_lasx_xvseqi_d:
3744 case Intrinsic::loongarch_lasx_xvslei_b:
3745 case Intrinsic::loongarch_lasx_xvslei_h:
3746 case Intrinsic::loongarch_lasx_xvslei_w:
3747 case Intrinsic::loongarch_lasx_xvslei_d:
3748 case Intrinsic::loongarch_lasx_xvslti_b:
3749 case Intrinsic::loongarch_lasx_xvslti_h:
3750 case Intrinsic::loongarch_lasx_xvslti_w:
3751 case Intrinsic::loongarch_lasx_xvslti_d:
3752 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3753 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3754 case Intrinsic::loongarch_lsx_vsrani_h_w:
3755 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3756 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3757 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3758 case Intrinsic::loongarch_lsx_vssrani_h_w:
3759 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3760 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3761 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3762 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3763 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3764 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3765 case Intrinsic::loongarch_lsx_vfrstpi_b:
3766 case Intrinsic::loongarch_lsx_vfrstpi_h:
3767 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3768 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3769 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3770 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3771 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3772 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3773 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3774 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3775 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3776 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3777 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3778 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3779 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3780 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3781 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3782 case Intrinsic::loongarch_lsx_vsat_d:
3783 case Intrinsic::loongarch_lsx_vsat_du:
3784 case Intrinsic::loongarch_lsx_vrotri_d:
3785 case Intrinsic::loongarch_lsx_vsrlri_d:
3786 case Intrinsic::loongarch_lsx_vsrari_d:
3787 case Intrinsic::loongarch_lasx_xvsat_d:
3788 case Intrinsic::loongarch_lasx_xvsat_du:
3789 case Intrinsic::loongarch_lasx_xvrotri_d:
3790 case Intrinsic::loongarch_lasx_xvsrlri_d:
3791 case Intrinsic::loongarch_lasx_xvsrari_d:
3792 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3793 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3794 case Intrinsic::loongarch_lsx_vsrani_w_d:
3795 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3796 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3797 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3798 case Intrinsic::loongarch_lsx_vssrani_w_d:
3799 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3800 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3801 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3802 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3803 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3804 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3805 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3806 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3807 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3808 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3809 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3810 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3811 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3812 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3813 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3814 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3815 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3816 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3817 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3818 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3819 case Intrinsic::loongarch_lsx_vsrani_d_q:
3820 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3821 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3822 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3823 case Intrinsic::loongarch_lsx_vssrani_d_q:
3824 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3825 case Intrinsic::loongarch_lsx_vssrani_du_q:
3826 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3827 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3828 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3829 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3830 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3831 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3832 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3833 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3834 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3835 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3836 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3837 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3838 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3839 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3840 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3841 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3842 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3843 case Intrinsic::loongarch_lsx_vnori_b:
3844 case Intrinsic::loongarch_lsx_vshuf4i_b:
3845 case Intrinsic::loongarch_lsx_vshuf4i_h:
3846 case Intrinsic::loongarch_lsx_vshuf4i_w:
3847 case Intrinsic::loongarch_lasx_xvnori_b:
3848 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3849 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3850 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3851 case Intrinsic::loongarch_lasx_xvpermi_d:
3852 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3853 case Intrinsic::loongarch_lsx_vshuf4i_d:
3854 case Intrinsic::loongarch_lsx_vpermi_w:
3855 case Intrinsic::loongarch_lsx_vbitseli_b:
3856 case Intrinsic::loongarch_lsx_vextrins_b:
3857 case Intrinsic::loongarch_lsx_vextrins_h:
3858 case Intrinsic::loongarch_lsx_vextrins_w:
3859 case Intrinsic::loongarch_lsx_vextrins_d:
3860 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3861 case Intrinsic::loongarch_lasx_xvpermi_w:
3862 case Intrinsic::loongarch_lasx_xvpermi_q:
3863 case Intrinsic::loongarch_lasx_xvbitseli_b:
3864 case Intrinsic::loongarch_lasx_xvextrins_b:
3865 case Intrinsic::loongarch_lasx_xvextrins_h:
3866 case Intrinsic::loongarch_lasx_xvextrins_w:
3867 case Intrinsic::loongarch_lasx_xvextrins_d:
3868 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3869 case Intrinsic::loongarch_lsx_vrepli_b:
3870 case Intrinsic::loongarch_lsx_vrepli_h:
3871 case Intrinsic::loongarch_lsx_vrepli_w:
3872 case Intrinsic::loongarch_lsx_vrepli_d:
3873 case Intrinsic::loongarch_lasx_xvrepli_b:
3874 case Intrinsic::loongarch_lasx_xvrepli_h:
3875 case Intrinsic::loongarch_lasx_xvrepli_w:
3876 case Intrinsic::loongarch_lasx_xvrepli_d:
3877 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3878 case Intrinsic::loongarch_lsx_vldi:
3879 case Intrinsic::loongarch_lasx_xvldi:
3880 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3881 }
3882}
3883
3884// Helper function that emits error message for intrinsics with chain and return
3885// merge values of a UNDEF and the chain.
3887 StringRef ErrorMsg,
3888 SelectionDAG &DAG) {
3889 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3890 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3891 SDLoc(Op));
3892}
3893
3894SDValue
3895LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3896 SelectionDAG &DAG) const {
3897 SDLoc DL(Op);
3898 MVT GRLenVT = Subtarget.getGRLenVT();
3899 EVT VT = Op.getValueType();
3900 SDValue Chain = Op.getOperand(0);
3901 const StringRef ErrorMsgOOR = "argument out of range";
3902 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3903 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3904
3905 switch (Op.getConstantOperandVal(1)) {
3906 default:
3907 return Op;
3908 case Intrinsic::loongarch_crc_w_b_w:
3909 case Intrinsic::loongarch_crc_w_h_w:
3910 case Intrinsic::loongarch_crc_w_w_w:
3911 case Intrinsic::loongarch_crc_w_d_w:
3912 case Intrinsic::loongarch_crcc_w_b_w:
3913 case Intrinsic::loongarch_crcc_w_h_w:
3914 case Intrinsic::loongarch_crcc_w_w_w:
3915 case Intrinsic::loongarch_crcc_w_d_w:
3916 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3917 case Intrinsic::loongarch_csrrd_w:
3918 case Intrinsic::loongarch_csrrd_d: {
3919 unsigned Imm = Op.getConstantOperandVal(2);
3920 return !isUInt<14>(Imm)
3921 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3922 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3923 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3924 }
3925 case Intrinsic::loongarch_csrwr_w:
3926 case Intrinsic::loongarch_csrwr_d: {
3927 unsigned Imm = Op.getConstantOperandVal(3);
3928 return !isUInt<14>(Imm)
3929 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3930 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3931 {Chain, Op.getOperand(2),
3932 DAG.getConstant(Imm, DL, GRLenVT)});
3933 }
3934 case Intrinsic::loongarch_csrxchg_w:
3935 case Intrinsic::loongarch_csrxchg_d: {
3936 unsigned Imm = Op.getConstantOperandVal(4);
3937 return !isUInt<14>(Imm)
3938 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3939 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3940 {Chain, Op.getOperand(2), Op.getOperand(3),
3941 DAG.getConstant(Imm, DL, GRLenVT)});
3942 }
3943 case Intrinsic::loongarch_iocsrrd_d: {
3944 return DAG.getNode(
3945 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3946 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3947 }
3948#define IOCSRRD_CASE(NAME, NODE) \
3949 case Intrinsic::loongarch_##NAME: { \
3950 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3951 {Chain, Op.getOperand(2)}); \
3952 }
3953 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3954 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3955 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3956#undef IOCSRRD_CASE
3957 case Intrinsic::loongarch_cpucfg: {
3958 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3959 {Chain, Op.getOperand(2)});
3960 }
3961 case Intrinsic::loongarch_lddir_d: {
3962 unsigned Imm = Op.getConstantOperandVal(3);
3963 return !isUInt<8>(Imm)
3964 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3965 : Op;
3966 }
3967 case Intrinsic::loongarch_movfcsr2gr: {
3968 if (!Subtarget.hasBasicF())
3969 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3970 unsigned Imm = Op.getConstantOperandVal(2);
3971 return !isUInt<2>(Imm)
3972 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3973 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3974 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3975 }
3976 case Intrinsic::loongarch_lsx_vld:
3977 case Intrinsic::loongarch_lsx_vldrepl_b:
3978 case Intrinsic::loongarch_lasx_xvld:
3979 case Intrinsic::loongarch_lasx_xvldrepl_b:
3980 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3981 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3982 : SDValue();
3983 case Intrinsic::loongarch_lsx_vldrepl_h:
3984 case Intrinsic::loongarch_lasx_xvldrepl_h:
3985 return !isShiftedInt<11, 1>(
3986 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3988 Op, "argument out of range or not a multiple of 2", DAG)
3989 : SDValue();
3990 case Intrinsic::loongarch_lsx_vldrepl_w:
3991 case Intrinsic::loongarch_lasx_xvldrepl_w:
3992 return !isShiftedInt<10, 2>(
3993 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3995 Op, "argument out of range or not a multiple of 4", DAG)
3996 : SDValue();
3997 case Intrinsic::loongarch_lsx_vldrepl_d:
3998 case Intrinsic::loongarch_lasx_xvldrepl_d:
3999 return !isShiftedInt<9, 3>(
4000 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4002 Op, "argument out of range or not a multiple of 8", DAG)
4003 : SDValue();
4004 }
4005}
4006
4007// Helper function that emits error message for intrinsics with void return
4008// value and return the chain.
4010 SelectionDAG &DAG) {
4011
4012 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4013 return Op.getOperand(0);
4014}
4015
4016SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4017 SelectionDAG &DAG) const {
4018 SDLoc DL(Op);
4019 MVT GRLenVT = Subtarget.getGRLenVT();
4020 SDValue Chain = Op.getOperand(0);
4021 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4022 SDValue Op2 = Op.getOperand(2);
4023 const StringRef ErrorMsgOOR = "argument out of range";
4024 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4025 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4026 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4027
4028 switch (IntrinsicEnum) {
4029 default:
4030 // TODO: Add more Intrinsics.
4031 return SDValue();
4032 case Intrinsic::loongarch_cacop_d:
4033 case Intrinsic::loongarch_cacop_w: {
4034 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4035 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4036 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4037 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4038 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4039 unsigned Imm1 = Op2->getAsZExtVal();
4040 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4041 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4042 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4043 return Op;
4044 }
4045 case Intrinsic::loongarch_dbar: {
4046 unsigned Imm = Op2->getAsZExtVal();
4047 return !isUInt<15>(Imm)
4048 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4049 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4050 DAG.getConstant(Imm, DL, GRLenVT));
4051 }
4052 case Intrinsic::loongarch_ibar: {
4053 unsigned Imm = Op2->getAsZExtVal();
4054 return !isUInt<15>(Imm)
4055 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4056 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4057 DAG.getConstant(Imm, DL, GRLenVT));
4058 }
4059 case Intrinsic::loongarch_break: {
4060 unsigned Imm = Op2->getAsZExtVal();
4061 return !isUInt<15>(Imm)
4062 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4063 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4064 DAG.getConstant(Imm, DL, GRLenVT));
4065 }
4066 case Intrinsic::loongarch_movgr2fcsr: {
4067 if (!Subtarget.hasBasicF())
4068 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4069 unsigned Imm = Op2->getAsZExtVal();
4070 return !isUInt<2>(Imm)
4071 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4072 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4073 DAG.getConstant(Imm, DL, GRLenVT),
4074 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4075 Op.getOperand(3)));
4076 }
4077 case Intrinsic::loongarch_syscall: {
4078 unsigned Imm = Op2->getAsZExtVal();
4079 return !isUInt<15>(Imm)
4080 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4081 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4082 DAG.getConstant(Imm, DL, GRLenVT));
4083 }
4084#define IOCSRWR_CASE(NAME, NODE) \
4085 case Intrinsic::loongarch_##NAME: { \
4086 SDValue Op3 = Op.getOperand(3); \
4087 return Subtarget.is64Bit() \
4088 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4089 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4090 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4091 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4092 Op3); \
4093 }
4094 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4095 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4096 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4097#undef IOCSRWR_CASE
4098 case Intrinsic::loongarch_iocsrwr_d: {
4099 return !Subtarget.is64Bit()
4100 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4101 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4102 Op2,
4103 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4104 Op.getOperand(3)));
4105 }
4106#define ASRT_LE_GT_CASE(NAME) \
4107 case Intrinsic::loongarch_##NAME: { \
4108 return !Subtarget.is64Bit() \
4109 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4110 : Op; \
4111 }
4112 ASRT_LE_GT_CASE(asrtle_d)
4113 ASRT_LE_GT_CASE(asrtgt_d)
4114#undef ASRT_LE_GT_CASE
4115 case Intrinsic::loongarch_ldpte_d: {
4116 unsigned Imm = Op.getConstantOperandVal(3);
4117 return !Subtarget.is64Bit()
4118 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4119 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4120 : Op;
4121 }
4122 case Intrinsic::loongarch_lsx_vst:
4123 case Intrinsic::loongarch_lasx_xvst:
4124 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4125 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4126 : SDValue();
4127 case Intrinsic::loongarch_lasx_xvstelm_b:
4128 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4129 !isUInt<5>(Op.getConstantOperandVal(5)))
4130 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4131 : SDValue();
4132 case Intrinsic::loongarch_lsx_vstelm_b:
4133 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4134 !isUInt<4>(Op.getConstantOperandVal(5)))
4135 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4136 : SDValue();
4137 case Intrinsic::loongarch_lasx_xvstelm_h:
4138 return (!isShiftedInt<8, 1>(
4139 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4140 !isUInt<4>(Op.getConstantOperandVal(5)))
4142 Op, "argument out of range or not a multiple of 2", DAG)
4143 : SDValue();
4144 case Intrinsic::loongarch_lsx_vstelm_h:
4145 return (!isShiftedInt<8, 1>(
4146 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4147 !isUInt<3>(Op.getConstantOperandVal(5)))
4149 Op, "argument out of range or not a multiple of 2", DAG)
4150 : SDValue();
4151 case Intrinsic::loongarch_lasx_xvstelm_w:
4152 return (!isShiftedInt<8, 2>(
4153 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4154 !isUInt<3>(Op.getConstantOperandVal(5)))
4156 Op, "argument out of range or not a multiple of 4", DAG)
4157 : SDValue();
4158 case Intrinsic::loongarch_lsx_vstelm_w:
4159 return (!isShiftedInt<8, 2>(
4160 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4161 !isUInt<2>(Op.getConstantOperandVal(5)))
4163 Op, "argument out of range or not a multiple of 4", DAG)
4164 : SDValue();
4165 case Intrinsic::loongarch_lasx_xvstelm_d:
4166 return (!isShiftedInt<8, 3>(
4167 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4168 !isUInt<2>(Op.getConstantOperandVal(5)))
4170 Op, "argument out of range or not a multiple of 8", DAG)
4171 : SDValue();
4172 case Intrinsic::loongarch_lsx_vstelm_d:
4173 return (!isShiftedInt<8, 3>(
4174 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4175 !isUInt<1>(Op.getConstantOperandVal(5)))
4177 Op, "argument out of range or not a multiple of 8", DAG)
4178 : SDValue();
4179 }
4180}
4181
4182SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4183 SelectionDAG &DAG) const {
4184 SDLoc DL(Op);
4185 SDValue Lo = Op.getOperand(0);
4186 SDValue Hi = Op.getOperand(1);
4187 SDValue Shamt = Op.getOperand(2);
4188 EVT VT = Lo.getValueType();
4189
4190 // if Shamt-GRLen < 0: // Shamt < GRLen
4191 // Lo = Lo << Shamt
4192 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4193 // else:
4194 // Lo = 0
4195 // Hi = Lo << (Shamt-GRLen)
4196
4197 SDValue Zero = DAG.getConstant(0, DL, VT);
4198 SDValue One = DAG.getConstant(1, DL, VT);
4199 SDValue MinusGRLen =
4200 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4201 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4202 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4203 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4204
4205 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4206 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4207 SDValue ShiftRightLo =
4208 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4209 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4210 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4211 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4212
4213 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4214
4215 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4216 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4217
4218 SDValue Parts[2] = {Lo, Hi};
4219 return DAG.getMergeValues(Parts, DL);
4220}
4221
4222SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4223 SelectionDAG &DAG,
4224 bool IsSRA) const {
4225 SDLoc DL(Op);
4226 SDValue Lo = Op.getOperand(0);
4227 SDValue Hi = Op.getOperand(1);
4228 SDValue Shamt = Op.getOperand(2);
4229 EVT VT = Lo.getValueType();
4230
4231 // SRA expansion:
4232 // if Shamt-GRLen < 0: // Shamt < GRLen
4233 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4234 // Hi = Hi >>s Shamt
4235 // else:
4236 // Lo = Hi >>s (Shamt-GRLen);
4237 // Hi = Hi >>s (GRLen-1)
4238 //
4239 // SRL expansion:
4240 // if Shamt-GRLen < 0: // Shamt < GRLen
4241 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4242 // Hi = Hi >>u Shamt
4243 // else:
4244 // Lo = Hi >>u (Shamt-GRLen);
4245 // Hi = 0;
4246
4247 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4248
4249 SDValue Zero = DAG.getConstant(0, DL, VT);
4250 SDValue One = DAG.getConstant(1, DL, VT);
4251 SDValue MinusGRLen =
4252 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4253 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4254 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4255 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4256
4257 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4258 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4259 SDValue ShiftLeftHi =
4260 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4261 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4262 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4263 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4264 SDValue HiFalse =
4265 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4266
4267 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4268
4269 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4270 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4271
4272 SDValue Parts[2] = {Lo, Hi};
4273 return DAG.getMergeValues(Parts, DL);
4274}
4275
4276// Returns the opcode of the target-specific SDNode that implements the 32-bit
4277// form of the given Opcode.
4279 switch (Opcode) {
4280 default:
4281 llvm_unreachable("Unexpected opcode");
4282 case ISD::SDIV:
4283 return LoongArchISD::DIV_W;
4284 case ISD::UDIV:
4285 return LoongArchISD::DIV_WU;
4286 case ISD::SREM:
4287 return LoongArchISD::MOD_W;
4288 case ISD::UREM:
4289 return LoongArchISD::MOD_WU;
4290 case ISD::SHL:
4291 return LoongArchISD::SLL_W;
4292 case ISD::SRA:
4293 return LoongArchISD::SRA_W;
4294 case ISD::SRL:
4295 return LoongArchISD::SRL_W;
4296 case ISD::ROTL:
4297 case ISD::ROTR:
4298 return LoongArchISD::ROTR_W;
4299 case ISD::CTTZ:
4300 return LoongArchISD::CTZ_W;
4301 case ISD::CTLZ:
4302 return LoongArchISD::CLZ_W;
4303 }
4304}
4305
4306// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4307// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4308// otherwise be promoted to i64, making it difficult to select the
4309// SLL_W/.../*W later one because the fact the operation was originally of
4310// type i8/i16/i32 is lost.
4312 unsigned ExtOpc = ISD::ANY_EXTEND) {
4313 SDLoc DL(N);
4314 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4315 SDValue NewOp0, NewRes;
4316
4317 switch (NumOp) {
4318 default:
4319 llvm_unreachable("Unexpected NumOp");
4320 case 1: {
4321 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4322 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4323 break;
4324 }
4325 case 2: {
4326 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4327 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4328 if (N->getOpcode() == ISD::ROTL) {
4329 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4330 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4331 }
4332 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4333 break;
4334 }
4335 // TODO:Handle more NumOp.
4336 }
4337
4338 // ReplaceNodeResults requires we maintain the same type for the return
4339 // value.
4340 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4341}
4342
4343// Converts the given 32-bit operation to a i64 operation with signed extension
4344// semantic to reduce the signed extension instructions.
4346 SDLoc DL(N);
4347 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4348 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4349 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4350 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4351 DAG.getValueType(MVT::i32));
4352 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4353}
4354
4355// Helper function that emits error message for intrinsics with/without chain
4356// and return a UNDEF or and the chain as the results.
4359 StringRef ErrorMsg, bool WithChain = true) {
4360 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4361 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4362 if (!WithChain)
4363 return;
4364 Results.push_back(N->getOperand(0));
4365}
4366
4367template <unsigned N>
4368static void
4370 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4371 unsigned ResOp) {
4372 const StringRef ErrorMsgOOR = "argument out of range";
4373 unsigned Imm = Node->getConstantOperandVal(2);
4374 if (!isUInt<N>(Imm)) {
4376 /*WithChain=*/false);
4377 return;
4378 }
4379 SDLoc DL(Node);
4380 SDValue Vec = Node->getOperand(1);
4381
4382 SDValue PickElt =
4383 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4384 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4386 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4387 PickElt.getValue(0)));
4388}
4389
4392 SelectionDAG &DAG,
4393 const LoongArchSubtarget &Subtarget,
4394 unsigned ResOp) {
4395 SDLoc DL(N);
4396 SDValue Vec = N->getOperand(1);
4397
4398 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4399 Results.push_back(
4400 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4401}
4402
4403static void
4405 SelectionDAG &DAG,
4406 const LoongArchSubtarget &Subtarget) {
4407 switch (N->getConstantOperandVal(0)) {
4408 default:
4409 llvm_unreachable("Unexpected Intrinsic.");
4410 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4411 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4413 break;
4414 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4415 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4416 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4418 break;
4419 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4420 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4422 break;
4423 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4424 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4426 break;
4427 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4428 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4429 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4431 break;
4432 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4433 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4435 break;
4436 case Intrinsic::loongarch_lsx_bz_b:
4437 case Intrinsic::loongarch_lsx_bz_h:
4438 case Intrinsic::loongarch_lsx_bz_w:
4439 case Intrinsic::loongarch_lsx_bz_d:
4440 case Intrinsic::loongarch_lasx_xbz_b:
4441 case Intrinsic::loongarch_lasx_xbz_h:
4442 case Intrinsic::loongarch_lasx_xbz_w:
4443 case Intrinsic::loongarch_lasx_xbz_d:
4444 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4446 break;
4447 case Intrinsic::loongarch_lsx_bz_v:
4448 case Intrinsic::loongarch_lasx_xbz_v:
4449 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4451 break;
4452 case Intrinsic::loongarch_lsx_bnz_b:
4453 case Intrinsic::loongarch_lsx_bnz_h:
4454 case Intrinsic::loongarch_lsx_bnz_w:
4455 case Intrinsic::loongarch_lsx_bnz_d:
4456 case Intrinsic::loongarch_lasx_xbnz_b:
4457 case Intrinsic::loongarch_lasx_xbnz_h:
4458 case Intrinsic::loongarch_lasx_xbnz_w:
4459 case Intrinsic::loongarch_lasx_xbnz_d:
4460 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4462 break;
4463 case Intrinsic::loongarch_lsx_bnz_v:
4464 case Intrinsic::loongarch_lasx_xbnz_v:
4465 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4467 break;
4468 }
4469}
4470
4473 SelectionDAG &DAG) {
4474 assert(N->getValueType(0) == MVT::i128 &&
4475 "AtomicCmpSwap on types less than 128 should be legal");
4476 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4477
4478 unsigned Opcode;
4479 switch (MemOp->getMergedOrdering()) {
4483 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4484 break;
4487 Opcode = LoongArch::PseudoCmpXchg128;
4488 break;
4489 default:
4490 llvm_unreachable("Unexpected ordering!");
4491 }
4492
4493 SDLoc DL(N);
4494 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4495 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4496 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4497 NewVal.first, NewVal.second, N->getOperand(0)};
4498
4499 SDNode *CmpSwap = DAG.getMachineNode(
4500 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4501 Ops);
4502 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4503 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4504 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4505 Results.push_back(SDValue(CmpSwap, 3));
4506}
4507
4510 SDLoc DL(N);
4511 EVT VT = N->getValueType(0);
4512 switch (N->getOpcode()) {
4513 default:
4514 llvm_unreachable("Don't know how to legalize this operation");
4515 case ISD::ADD:
4516 case ISD::SUB:
4517 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4518 "Unexpected custom legalisation");
4519 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4520 break;
4521 case ISD::SDIV:
4522 case ISD::UDIV:
4523 case ISD::SREM:
4524 case ISD::UREM:
4525 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4526 "Unexpected custom legalisation");
4527 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4528 Subtarget.hasDiv32() && VT == MVT::i32
4530 : ISD::SIGN_EXTEND));
4531 break;
4532 case ISD::SHL:
4533 case ISD::SRA:
4534 case ISD::SRL:
4535 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4536 "Unexpected custom legalisation");
4537 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4538 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4539 break;
4540 }
4541 break;
4542 case ISD::ROTL:
4543 case ISD::ROTR:
4544 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4545 "Unexpected custom legalisation");
4546 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4547 break;
4548 case ISD::FP_TO_SINT: {
4549 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4550 "Unexpected custom legalisation");
4551 SDValue Src = N->getOperand(0);
4552 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4553 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4555 if (!isTypeLegal(Src.getValueType()))
4556 return;
4557 if (Src.getValueType() == MVT::f16)
4558 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4559 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4560 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4561 return;
4562 }
4563 // If the FP type needs to be softened, emit a library call using the 'si'
4564 // version. If we left it to default legalization we'd end up with 'di'.
4565 RTLIB::Libcall LC;
4566 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4567 MakeLibCallOptions CallOptions;
4568 EVT OpVT = Src.getValueType();
4569 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4570 SDValue Chain = SDValue();
4571 SDValue Result;
4572 std::tie(Result, Chain) =
4573 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4574 Results.push_back(Result);
4575 break;
4576 }
4577 case ISD::BITCAST: {
4578 SDValue Src = N->getOperand(0);
4579 EVT SrcVT = Src.getValueType();
4580 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4581 Subtarget.hasBasicF()) {
4582 SDValue Dst =
4583 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4584 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4585 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4587 DAG.getVTList(MVT::i32, MVT::i32), Src);
4588 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4589 NewReg.getValue(0), NewReg.getValue(1));
4590 Results.push_back(RetReg);
4591 }
4592 break;
4593 }
4594 case ISD::FP_TO_UINT: {
4595 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4596 "Unexpected custom legalisation");
4597 auto &TLI = DAG.getTargetLoweringInfo();
4598 SDValue Tmp1, Tmp2;
4599 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4600 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4601 break;
4602 }
4603 case ISD::BSWAP: {
4604 SDValue Src = N->getOperand(0);
4605 assert((VT == MVT::i16 || VT == MVT::i32) &&
4606 "Unexpected custom legalization");
4607 MVT GRLenVT = Subtarget.getGRLenVT();
4608 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4609 SDValue Tmp;
4610 switch (VT.getSizeInBits()) {
4611 default:
4612 llvm_unreachable("Unexpected operand width");
4613 case 16:
4614 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4615 break;
4616 case 32:
4617 // Only LA64 will get to here due to the size mismatch between VT and
4618 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4619 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4620 break;
4621 }
4622 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4623 break;
4624 }
4625 case ISD::BITREVERSE: {
4626 SDValue Src = N->getOperand(0);
4627 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4628 "Unexpected custom legalization");
4629 MVT GRLenVT = Subtarget.getGRLenVT();
4630 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4631 SDValue Tmp;
4632 switch (VT.getSizeInBits()) {
4633 default:
4634 llvm_unreachable("Unexpected operand width");
4635 case 8:
4636 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4637 break;
4638 case 32:
4639 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4640 break;
4641 }
4642 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4643 break;
4644 }
4645 case ISD::CTLZ:
4646 case ISD::CTTZ: {
4647 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4648 "Unexpected custom legalisation");
4649 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4650 break;
4651 }
4653 SDValue Chain = N->getOperand(0);
4654 SDValue Op2 = N->getOperand(2);
4655 MVT GRLenVT = Subtarget.getGRLenVT();
4656 const StringRef ErrorMsgOOR = "argument out of range";
4657 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4658 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4659
4660 switch (N->getConstantOperandVal(1)) {
4661 default:
4662 llvm_unreachable("Unexpected Intrinsic.");
4663 case Intrinsic::loongarch_movfcsr2gr: {
4664 if (!Subtarget.hasBasicF()) {
4665 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4666 return;
4667 }
4668 unsigned Imm = Op2->getAsZExtVal();
4669 if (!isUInt<2>(Imm)) {
4670 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4671 return;
4672 }
4673 SDValue MOVFCSR2GRResults = DAG.getNode(
4674 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4675 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4676 Results.push_back(
4677 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4678 Results.push_back(MOVFCSR2GRResults.getValue(1));
4679 break;
4680 }
4681#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4682 case Intrinsic::loongarch_##NAME: { \
4683 SDValue NODE = DAG.getNode( \
4684 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4685 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4686 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4687 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4688 Results.push_back(NODE.getValue(1)); \
4689 break; \
4690 }
4691 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4692 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4693 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4694 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4695 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4696 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4697#undef CRC_CASE_EXT_BINARYOP
4698
4699#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4700 case Intrinsic::loongarch_##NAME: { \
4701 SDValue NODE = DAG.getNode( \
4702 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4703 {Chain, Op2, \
4704 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4705 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4706 Results.push_back(NODE.getValue(1)); \
4707 break; \
4708 }
4709 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4710 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4711#undef CRC_CASE_EXT_UNARYOP
4712#define CSR_CASE(ID) \
4713 case Intrinsic::loongarch_##ID: { \
4714 if (!Subtarget.is64Bit()) \
4715 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4716 break; \
4717 }
4718 CSR_CASE(csrrd_d);
4719 CSR_CASE(csrwr_d);
4720 CSR_CASE(csrxchg_d);
4721 CSR_CASE(iocsrrd_d);
4722#undef CSR_CASE
4723 case Intrinsic::loongarch_csrrd_w: {
4724 unsigned Imm = Op2->getAsZExtVal();
4725 if (!isUInt<14>(Imm)) {
4726 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4727 return;
4728 }
4729 SDValue CSRRDResults =
4730 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4731 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4732 Results.push_back(
4733 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4734 Results.push_back(CSRRDResults.getValue(1));
4735 break;
4736 }
4737 case Intrinsic::loongarch_csrwr_w: {
4738 unsigned Imm = N->getConstantOperandVal(3);
4739 if (!isUInt<14>(Imm)) {
4740 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4741 return;
4742 }
4743 SDValue CSRWRResults =
4744 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4745 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4746 DAG.getConstant(Imm, DL, GRLenVT)});
4747 Results.push_back(
4748 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4749 Results.push_back(CSRWRResults.getValue(1));
4750 break;
4751 }
4752 case Intrinsic::loongarch_csrxchg_w: {
4753 unsigned Imm = N->getConstantOperandVal(4);
4754 if (!isUInt<14>(Imm)) {
4755 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4756 return;
4757 }
4758 SDValue CSRXCHGResults = DAG.getNode(
4759 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4760 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4761 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4762 DAG.getConstant(Imm, DL, GRLenVT)});
4763 Results.push_back(
4764 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4765 Results.push_back(CSRXCHGResults.getValue(1));
4766 break;
4767 }
4768#define IOCSRRD_CASE(NAME, NODE) \
4769 case Intrinsic::loongarch_##NAME: { \
4770 SDValue IOCSRRDResults = \
4771 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4772 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4773 Results.push_back( \
4774 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4775 Results.push_back(IOCSRRDResults.getValue(1)); \
4776 break; \
4777 }
4778 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4779 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4780 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4781#undef IOCSRRD_CASE
4782 case Intrinsic::loongarch_cpucfg: {
4783 SDValue CPUCFGResults =
4784 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4785 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4786 Results.push_back(
4787 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4788 Results.push_back(CPUCFGResults.getValue(1));
4789 break;
4790 }
4791 case Intrinsic::loongarch_lddir_d: {
4792 if (!Subtarget.is64Bit()) {
4793 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4794 return;
4795 }
4796 break;
4797 }
4798 }
4799 break;
4800 }
4801 case ISD::READ_REGISTER: {
4802 if (Subtarget.is64Bit())
4803 DAG.getContext()->emitError(
4804 "On LA64, only 64-bit registers can be read.");
4805 else
4806 DAG.getContext()->emitError(
4807 "On LA32, only 32-bit registers can be read.");
4808 Results.push_back(DAG.getUNDEF(VT));
4809 Results.push_back(N->getOperand(0));
4810 break;
4811 }
4813 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4814 break;
4815 }
4816 case ISD::LROUND: {
4817 SDValue Op0 = N->getOperand(0);
4818 EVT OpVT = Op0.getValueType();
4819 RTLIB::Libcall LC =
4820 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4821 MakeLibCallOptions CallOptions;
4822 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4823 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4824 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4825 Results.push_back(Result);
4826 break;
4827 }
4828 case ISD::ATOMIC_CMP_SWAP: {
4830 break;
4831 }
4832 case ISD::TRUNCATE: {
4833 MVT VT = N->getSimpleValueType(0);
4834 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4835 return;
4836
4837 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4838 SDValue In = N->getOperand(0);
4839 EVT InVT = In.getValueType();
4840 EVT InEltVT = InVT.getVectorElementType();
4841 EVT EltVT = VT.getVectorElementType();
4842 unsigned MinElts = VT.getVectorNumElements();
4843 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4844 unsigned InBits = InVT.getSizeInBits();
4845
4846 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4847 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4848 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4849 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4850 for (unsigned I = 0; I < MinElts; ++I)
4851 TruncMask[I] = Scale * I;
4852
4853 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4854 MVT SVT = In.getSimpleValueType().getScalarType();
4855 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4856 SDValue WidenIn =
4857 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4858 DAG.getVectorIdxConstant(0, DL));
4859 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4860 "Illegal vector type in truncation");
4861 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4862 Results.push_back(
4863 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4864 return;
4865 }
4866 }
4867
4868 break;
4869 }
4870 }
4871}
4872
4875 const LoongArchSubtarget &Subtarget) {
4876 if (DCI.isBeforeLegalizeOps())
4877 return SDValue();
4878
4879 SDValue FirstOperand = N->getOperand(0);
4880 SDValue SecondOperand = N->getOperand(1);
4881 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4882 EVT ValTy = N->getValueType(0);
4883 SDLoc DL(N);
4884 uint64_t lsb, msb;
4885 unsigned SMIdx, SMLen;
4886 ConstantSDNode *CN;
4887 SDValue NewOperand;
4888 MVT GRLenVT = Subtarget.getGRLenVT();
4889
4890 // BSTRPICK requires the 32S feature.
4891 if (!Subtarget.has32S())
4892 return SDValue();
4893
4894 // Op's second operand must be a shifted mask.
4895 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4896 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4897 return SDValue();
4898
4899 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4900 // Pattern match BSTRPICK.
4901 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4902 // => BSTRPICK $dst, $src, msb, lsb
4903 // where msb = lsb + len - 1
4904
4905 // The second operand of the shift must be an immediate.
4906 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4907 return SDValue();
4908
4909 lsb = CN->getZExtValue();
4910
4911 // Return if the shifted mask does not start at bit 0 or the sum of its
4912 // length and lsb exceeds the word's size.
4913 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4914 return SDValue();
4915
4916 NewOperand = FirstOperand.getOperand(0);
4917 } else {
4918 // Pattern match BSTRPICK.
4919 // $dst = and $src, (2**len- 1) , if len > 12
4920 // => BSTRPICK $dst, $src, msb, lsb
4921 // where lsb = 0 and msb = len - 1
4922
4923 // If the mask is <= 0xfff, andi can be used instead.
4924 if (CN->getZExtValue() <= 0xfff)
4925 return SDValue();
4926
4927 // Return if the MSB exceeds.
4928 if (SMIdx + SMLen > ValTy.getSizeInBits())
4929 return SDValue();
4930
4931 if (SMIdx > 0) {
4932 // Omit if the constant has more than 2 uses. This a conservative
4933 // decision. Whether it is a win depends on the HW microarchitecture.
4934 // However it should always be better for 1 and 2 uses.
4935 if (CN->use_size() > 2)
4936 return SDValue();
4937 // Return if the constant can be composed by a single LU12I.W.
4938 if ((CN->getZExtValue() & 0xfff) == 0)
4939 return SDValue();
4940 // Return if the constand can be composed by a single ADDI with
4941 // the zero register.
4942 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4943 return SDValue();
4944 }
4945
4946 lsb = SMIdx;
4947 NewOperand = FirstOperand;
4948 }
4949
4950 msb = lsb + SMLen - 1;
4951 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4952 DAG.getConstant(msb, DL, GRLenVT),
4953 DAG.getConstant(lsb, DL, GRLenVT));
4954 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4955 return NR0;
4956 // Try to optimize to
4957 // bstrpick $Rd, $Rs, msb, lsb
4958 // slli $Rd, $Rd, lsb
4959 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4960 DAG.getConstant(lsb, DL, GRLenVT));
4961}
4962
4965 const LoongArchSubtarget &Subtarget) {
4966 // BSTRPICK requires the 32S feature.
4967 if (!Subtarget.has32S())
4968 return SDValue();
4969
4970 if (DCI.isBeforeLegalizeOps())
4971 return SDValue();
4972
4973 // $dst = srl (and $src, Mask), Shamt
4974 // =>
4975 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4976 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4977 //
4978
4979 SDValue FirstOperand = N->getOperand(0);
4980 ConstantSDNode *CN;
4981 EVT ValTy = N->getValueType(0);
4982 SDLoc DL(N);
4983 MVT GRLenVT = Subtarget.getGRLenVT();
4984 unsigned MaskIdx, MaskLen;
4985 uint64_t Shamt;
4986
4987 // The first operand must be an AND and the second operand of the AND must be
4988 // a shifted mask.
4989 if (FirstOperand.getOpcode() != ISD::AND ||
4990 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4991 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4992 return SDValue();
4993
4994 // The second operand (shift amount) must be an immediate.
4995 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4996 return SDValue();
4997
4998 Shamt = CN->getZExtValue();
4999 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5000 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5001 FirstOperand->getOperand(0),
5002 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5003 DAG.getConstant(Shamt, DL, GRLenVT));
5004
5005 return SDValue();
5006}
5007
5008// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5009// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5010static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5011 unsigned Depth) {
5012 // Limit recursion.
5014 return false;
5015 switch (Src.getOpcode()) {
5016 case ISD::SETCC:
5017 case ISD::TRUNCATE:
5018 return Src.getOperand(0).getValueSizeInBits() == Size;
5019 case ISD::FREEZE:
5020 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5021 case ISD::AND:
5022 case ISD::XOR:
5023 case ISD::OR:
5024 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5025 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5026 case ISD::SELECT:
5027 case ISD::VSELECT:
5028 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5029 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5030 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5031 case ISD::BUILD_VECTOR:
5032 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5033 ISD::isBuildVectorAllOnes(Src.getNode());
5034 }
5035 return false;
5036}
5037
5038// Helper to push sign extension of vXi1 SETCC result through bitops.
5040 SDValue Src, const SDLoc &DL) {
5041 switch (Src.getOpcode()) {
5042 case ISD::SETCC:
5043 case ISD::FREEZE:
5044 case ISD::TRUNCATE:
5045 case ISD::BUILD_VECTOR:
5046 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5047 case ISD::AND:
5048 case ISD::XOR:
5049 case ISD::OR:
5050 return DAG.getNode(
5051 Src.getOpcode(), DL, SExtVT,
5052 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5053 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5054 case ISD::SELECT:
5055 case ISD::VSELECT:
5056 return DAG.getSelect(
5057 DL, SExtVT, Src.getOperand(0),
5058 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5059 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5060 }
5061 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5062}
5063
5064static SDValue
5067 const LoongArchSubtarget &Subtarget) {
5068 SDLoc DL(N);
5069 EVT VT = N->getValueType(0);
5070 SDValue Src = N->getOperand(0);
5071 EVT SrcVT = Src.getValueType();
5072
5073 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5074 return SDValue();
5075
5076 bool UseLASX;
5077 unsigned Opc = ISD::DELETED_NODE;
5078 EVT CmpVT = Src.getOperand(0).getValueType();
5079 EVT EltVT = CmpVT.getVectorElementType();
5080
5081 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5082 UseLASX = false;
5083 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5084 CmpVT.getSizeInBits() == 256)
5085 UseLASX = true;
5086 else
5087 return SDValue();
5088
5089 SDValue SrcN1 = Src.getOperand(1);
5090 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5091 default:
5092 break;
5093 case ISD::SETEQ:
5094 // x == 0 => not (vmsknez.b x)
5095 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5097 break;
5098 case ISD::SETGT:
5099 // x > -1 => vmskgez.b x
5100 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5102 break;
5103 case ISD::SETGE:
5104 // x >= 0 => vmskgez.b x
5105 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5107 break;
5108 case ISD::SETLT:
5109 // x < 0 => vmskltz.{b,h,w,d} x
5110 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5111 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5112 EltVT == MVT::i64))
5114 break;
5115 case ISD::SETLE:
5116 // x <= -1 => vmskltz.{b,h,w,d} x
5117 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5118 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5119 EltVT == MVT::i64))
5121 break;
5122 case ISD::SETNE:
5123 // x != 0 => vmsknez.b x
5124 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5126 break;
5127 }
5128
5129 if (Opc == ISD::DELETED_NODE)
5130 return SDValue();
5131
5132 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
5134 V = DAG.getZExtOrTrunc(V, DL, T);
5135 return DAG.getBitcast(VT, V);
5136}
5137
5140 const LoongArchSubtarget &Subtarget) {
5141 SDLoc DL(N);
5142 EVT VT = N->getValueType(0);
5143 SDValue Src = N->getOperand(0);
5144 EVT SrcVT = Src.getValueType();
5145
5146 if (!DCI.isBeforeLegalizeOps())
5147 return SDValue();
5148
5149 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5150 return SDValue();
5151
5152 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5153 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5154 if (Res)
5155 return Res;
5156
5157 // Generate vXi1 using [X]VMSKLTZ
5158 MVT SExtVT;
5159 unsigned Opc;
5160 bool UseLASX = false;
5161 bool PropagateSExt = false;
5162
5163 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5164 EVT CmpVT = Src.getOperand(0).getValueType();
5165 if (CmpVT.getSizeInBits() > 256)
5166 return SDValue();
5167 }
5168
5169 switch (SrcVT.getSimpleVT().SimpleTy) {
5170 default:
5171 return SDValue();
5172 case MVT::v2i1:
5173 SExtVT = MVT::v2i64;
5174 break;
5175 case MVT::v4i1:
5176 SExtVT = MVT::v4i32;
5177 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5178 SExtVT = MVT::v4i64;
5179 UseLASX = true;
5180 PropagateSExt = true;
5181 }
5182 break;
5183 case MVT::v8i1:
5184 SExtVT = MVT::v8i16;
5185 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5186 SExtVT = MVT::v8i32;
5187 UseLASX = true;
5188 PropagateSExt = true;
5189 }
5190 break;
5191 case MVT::v16i1:
5192 SExtVT = MVT::v16i8;
5193 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5194 SExtVT = MVT::v16i16;
5195 UseLASX = true;
5196 PropagateSExt = true;
5197 }
5198 break;
5199 case MVT::v32i1:
5200 SExtVT = MVT::v32i8;
5201 UseLASX = true;
5202 break;
5203 };
5204 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5205 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5206
5207 SDValue V;
5208 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5209 if (Src.getSimpleValueType() == MVT::v32i8) {
5210 SDValue Lo, Hi;
5211 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5212 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5213 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5214 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5215 DAG.getConstant(16, DL, MVT::i8));
5216 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5217 } else if (UseLASX) {
5218 return SDValue();
5219 }
5220 }
5221
5222 if (!V) {
5224 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5225 }
5226
5228 V = DAG.getZExtOrTrunc(V, DL, T);
5229 return DAG.getBitcast(VT, V);
5230}
5231
5234 const LoongArchSubtarget &Subtarget) {
5235 MVT GRLenVT = Subtarget.getGRLenVT();
5236 EVT ValTy = N->getValueType(0);
5237 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5238 ConstantSDNode *CN0, *CN1;
5239 SDLoc DL(N);
5240 unsigned ValBits = ValTy.getSizeInBits();
5241 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5242 unsigned Shamt;
5243 bool SwapAndRetried = false;
5244
5245 // BSTRPICK requires the 32S feature.
5246 if (!Subtarget.has32S())
5247 return SDValue();
5248
5249 if (DCI.isBeforeLegalizeOps())
5250 return SDValue();
5251
5252 if (ValBits != 32 && ValBits != 64)
5253 return SDValue();
5254
5255Retry:
5256 // 1st pattern to match BSTRINS:
5257 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5258 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5259 // =>
5260 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5261 if (N0.getOpcode() == ISD::AND &&
5262 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5263 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5264 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5265 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5266 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5267 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5268 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5269 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5270 (MaskIdx0 + MaskLen0 <= ValBits)) {
5271 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5272 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5273 N1.getOperand(0).getOperand(0),
5274 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5275 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5276 }
5277
5278 // 2nd pattern to match BSTRINS:
5279 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5280 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5281 // =>
5282 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5283 if (N0.getOpcode() == ISD::AND &&
5284 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5285 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5286 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5287 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5288 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5289 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5290 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5291 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5292 (MaskIdx0 + MaskLen0 <= ValBits)) {
5293 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5294 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5295 N1.getOperand(0).getOperand(0),
5296 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5297 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5298 }
5299
5300 // 3rd pattern to match BSTRINS:
5301 // R = or (and X, mask0), (and Y, mask1)
5302 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5303 // =>
5304 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5305 // where msb = lsb + size - 1
5306 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5307 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5308 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5309 (MaskIdx0 + MaskLen0 <= 64) &&
5310 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5311 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5312 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5313 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5314 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5315 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5316 DAG.getConstant(ValBits == 32
5317 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5318 : (MaskIdx0 + MaskLen0 - 1),
5319 DL, GRLenVT),
5320 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5321 }
5322
5323 // 4th pattern to match BSTRINS:
5324 // R = or (and X, mask), (shl Y, shamt)
5325 // where mask = (2**shamt - 1)
5326 // =>
5327 // R = BSTRINS X, Y, ValBits - 1, shamt
5328 // where ValBits = 32 or 64
5329 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5330 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5331 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5332 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5333 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5334 (MaskIdx0 + MaskLen0 <= ValBits)) {
5335 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5336 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5337 N1.getOperand(0),
5338 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5339 DAG.getConstant(Shamt, DL, GRLenVT));
5340 }
5341
5342 // 5th pattern to match BSTRINS:
5343 // R = or (and X, mask), const
5344 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5345 // =>
5346 // R = BSTRINS X, (const >> lsb), msb, lsb
5347 // where msb = lsb + size - 1
5348 if (N0.getOpcode() == ISD::AND &&
5349 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5350 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5351 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5352 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5353 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5354 return DAG.getNode(
5355 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5356 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5357 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5358 : (MaskIdx0 + MaskLen0 - 1),
5359 DL, GRLenVT),
5360 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5361 }
5362
5363 // 6th pattern.
5364 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5365 // by the incoming bits are known to be zero.
5366 // =>
5367 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5368 //
5369 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5370 // pattern is more common than the 1st. So we put the 1st before the 6th in
5371 // order to match as many nodes as possible.
5372 ConstantSDNode *CNMask, *CNShamt;
5373 unsigned MaskIdx, MaskLen;
5374 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5375 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5376 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5377 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5378 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5379 Shamt = CNShamt->getZExtValue();
5380 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5381 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5382 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5383 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5384 N1.getOperand(0).getOperand(0),
5385 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5386 DAG.getConstant(Shamt, DL, GRLenVT));
5387 }
5388 }
5389
5390 // 7th pattern.
5391 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5392 // overwritten by the incoming bits are known to be zero.
5393 // =>
5394 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5395 //
5396 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5397 // before the 7th in order to match as many nodes as possible.
5398 if (N1.getOpcode() == ISD::AND &&
5399 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5400 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5401 N1.getOperand(0).getOpcode() == ISD::SHL &&
5402 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5403 CNShamt->getZExtValue() == MaskIdx) {
5404 APInt ShMask(ValBits, CNMask->getZExtValue());
5405 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5406 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5407 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5408 N1.getOperand(0).getOperand(0),
5409 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5410 DAG.getConstant(MaskIdx, DL, GRLenVT));
5411 }
5412 }
5413
5414 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5415 if (!SwapAndRetried) {
5416 std::swap(N0, N1);
5417 SwapAndRetried = true;
5418 goto Retry;
5419 }
5420
5421 SwapAndRetried = false;
5422Retry2:
5423 // 8th pattern.
5424 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5425 // the incoming bits are known to be zero.
5426 // =>
5427 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5428 //
5429 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5430 // we put it here in order to match as many nodes as possible or generate less
5431 // instructions.
5432 if (N1.getOpcode() == ISD::AND &&
5433 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5434 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5435 APInt ShMask(ValBits, CNMask->getZExtValue());
5436 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5437 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5438 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5439 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5440 N1->getOperand(0),
5441 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5442 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5443 DAG.getConstant(MaskIdx, DL, GRLenVT));
5444 }
5445 }
5446 // Swap N0/N1 and retry.
5447 if (!SwapAndRetried) {
5448 std::swap(N0, N1);
5449 SwapAndRetried = true;
5450 goto Retry2;
5451 }
5452
5453 return SDValue();
5454}
5455
5456static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5457 ExtType = ISD::NON_EXTLOAD;
5458
5459 switch (V.getNode()->getOpcode()) {
5460 case ISD::LOAD: {
5461 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5462 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5463 (LoadNode->getMemoryVT() == MVT::i16)) {
5464 ExtType = LoadNode->getExtensionType();
5465 return true;
5466 }
5467 return false;
5468 }
5469 case ISD::AssertSext: {
5470 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5471 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5472 ExtType = ISD::SEXTLOAD;
5473 return true;
5474 }
5475 return false;
5476 }
5477 case ISD::AssertZext: {
5478 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5479 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5480 ExtType = ISD::ZEXTLOAD;
5481 return true;
5482 }
5483 return false;
5484 }
5485 default:
5486 return false;
5487 }
5488
5489 return false;
5490}
5491
5492// Eliminate redundant truncation and zero-extension nodes.
5493// * Case 1:
5494// +------------+ +------------+ +------------+
5495// | Input1 | | Input2 | | CC |
5496// +------------+ +------------+ +------------+
5497// | | |
5498// V V +----+
5499// +------------+ +------------+ |
5500// | TRUNCATE | | TRUNCATE | |
5501// +------------+ +------------+ |
5502// | | |
5503// V V |
5504// +------------+ +------------+ |
5505// | ZERO_EXT | | ZERO_EXT | |
5506// +------------+ +------------+ |
5507// | | |
5508// | +-------------+ |
5509// V V | |
5510// +----------------+ | |
5511// | AND | | |
5512// +----------------+ | |
5513// | | |
5514// +---------------+ | |
5515// | | |
5516// V V V
5517// +-------------+
5518// | CMP |
5519// +-------------+
5520// * Case 2:
5521// +------------+ +------------+ +-------------+ +------------+ +------------+
5522// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5523// +------------+ +------------+ +-------------+ +------------+ +------------+
5524// | | | | |
5525// V | | | |
5526// +------------+ | | | |
5527// | XOR |<---------------------+ | |
5528// +------------+ | | |
5529// | | | |
5530// V V +---------------+ |
5531// +------------+ +------------+ | |
5532// | TRUNCATE | | TRUNCATE | | +-------------------------+
5533// +------------+ +------------+ | |
5534// | | | |
5535// V V | |
5536// +------------+ +------------+ | |
5537// | ZERO_EXT | | ZERO_EXT | | |
5538// +------------+ +------------+ | |
5539// | | | |
5540// V V | |
5541// +----------------+ | |
5542// | AND | | |
5543// +----------------+ | |
5544// | | |
5545// +---------------+ | |
5546// | | |
5547// V V V
5548// +-------------+
5549// | CMP |
5550// +-------------+
5553 const LoongArchSubtarget &Subtarget) {
5554 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5555
5556 SDNode *AndNode = N->getOperand(0).getNode();
5557 if (AndNode->getOpcode() != ISD::AND)
5558 return SDValue();
5559
5560 SDValue AndInputValue2 = AndNode->getOperand(1);
5561 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5562 return SDValue();
5563
5564 SDValue CmpInputValue = N->getOperand(1);
5565 SDValue AndInputValue1 = AndNode->getOperand(0);
5566 if (AndInputValue1.getOpcode() == ISD::XOR) {
5567 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5568 return SDValue();
5569 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5570 if (!CN || CN->getSExtValue() != -1)
5571 return SDValue();
5572 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5573 if (!CN || CN->getSExtValue() != 0)
5574 return SDValue();
5575 AndInputValue1 = AndInputValue1.getOperand(0);
5576 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5577 return SDValue();
5578 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5579 if (AndInputValue2 != CmpInputValue)
5580 return SDValue();
5581 } else {
5582 return SDValue();
5583 }
5584
5585 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5586 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5587 return SDValue();
5588
5589 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5590 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5591 return SDValue();
5592
5593 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5594 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5595 ISD::LoadExtType ExtType1;
5596 ISD::LoadExtType ExtType2;
5597
5598 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5599 !checkValueWidth(TruncInputValue2, ExtType2))
5600 return SDValue();
5601
5602 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5603 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5604 return SDValue();
5605
5606 if ((ExtType2 != ISD::ZEXTLOAD) &&
5607 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5608 return SDValue();
5609
5610 // These truncation and zero-extension nodes are not necessary, remove them.
5611 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5612 TruncInputValue1, TruncInputValue2);
5613 SDValue NewSetCC =
5614 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5615 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5616 return SDValue(N, 0);
5617}
5618
5619// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5622 const LoongArchSubtarget &Subtarget) {
5623 if (DCI.isBeforeLegalizeOps())
5624 return SDValue();
5625
5626 SDValue Src = N->getOperand(0);
5627 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5628 return SDValue();
5629
5630 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5631 Src.getOperand(0));
5632}
5633
5634// Perform common combines for BR_CC and SELECT_CC conditions.
5635static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5636 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5637 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5638
5639 // As far as arithmetic right shift always saves the sign,
5640 // shift can be omitted.
5641 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5642 // setge (sra X, N), 0 -> setge X, 0
5643 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5644 LHS.getOpcode() == ISD::SRA) {
5645 LHS = LHS.getOperand(0);
5646 return true;
5647 }
5648
5649 if (!ISD::isIntEqualitySetCC(CCVal))
5650 return false;
5651
5652 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5653 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5654 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5655 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5656 // If we're looking for eq 0 instead of ne 0, we need to invert the
5657 // condition.
5658 bool Invert = CCVal == ISD::SETEQ;
5659 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5660 if (Invert)
5661 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5662
5663 RHS = LHS.getOperand(1);
5664 LHS = LHS.getOperand(0);
5665 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5666
5667 CC = DAG.getCondCode(CCVal);
5668 return true;
5669 }
5670
5671 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5672 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5673 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5674 SDValue LHS0 = LHS.getOperand(0);
5675 if (LHS0.getOpcode() == ISD::AND &&
5676 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5677 uint64_t Mask = LHS0.getConstantOperandVal(1);
5678 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5679 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5680 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5681 CC = DAG.getCondCode(CCVal);
5682
5683 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5684 LHS = LHS0.getOperand(0);
5685 if (ShAmt != 0)
5686 LHS =
5687 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5688 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5689 return true;
5690 }
5691 }
5692 }
5693
5694 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5695 // This can occur when legalizing some floating point comparisons.
5696 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5697 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5698 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5699 CC = DAG.getCondCode(CCVal);
5700 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5701 return true;
5702 }
5703
5704 return false;
5705}
5706
5709 const LoongArchSubtarget &Subtarget) {
5710 SDValue LHS = N->getOperand(1);
5711 SDValue RHS = N->getOperand(2);
5712 SDValue CC = N->getOperand(3);
5713 SDLoc DL(N);
5714
5715 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5716 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5717 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5718
5719 return SDValue();
5720}
5721
5724 const LoongArchSubtarget &Subtarget) {
5725 // Transform
5726 SDValue LHS = N->getOperand(0);
5727 SDValue RHS = N->getOperand(1);
5728 SDValue CC = N->getOperand(2);
5729 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5730 SDValue TrueV = N->getOperand(3);
5731 SDValue FalseV = N->getOperand(4);
5732 SDLoc DL(N);
5733 EVT VT = N->getValueType(0);
5734
5735 // If the True and False values are the same, we don't need a select_cc.
5736 if (TrueV == FalseV)
5737 return TrueV;
5738
5739 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5740 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5741 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5743 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5744 if (CCVal == ISD::CondCode::SETGE)
5745 std::swap(TrueV, FalseV);
5746
5747 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5748 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5749 // Only handle simm12, if it is not in this range, it can be considered as
5750 // register.
5751 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5752 isInt<12>(TrueSImm - FalseSImm)) {
5753 SDValue SRA =
5754 DAG.getNode(ISD::SRA, DL, VT, LHS,
5755 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5756 SDValue AND =
5757 DAG.getNode(ISD::AND, DL, VT, SRA,
5758 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5759 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5760 }
5761
5762 if (CCVal == ISD::CondCode::SETGE)
5763 std::swap(TrueV, FalseV);
5764 }
5765
5766 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5767 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5768 {LHS, RHS, CC, TrueV, FalseV});
5769
5770 return SDValue();
5771}
5772
5773template <unsigned N>
5775 SelectionDAG &DAG,
5776 const LoongArchSubtarget &Subtarget,
5777 bool IsSigned = false) {
5778 SDLoc DL(Node);
5779 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5780 // Check the ImmArg.
5781 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5782 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5783 DAG.getContext()->emitError(Node->getOperationName(0) +
5784 ": argument out of range.");
5785 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5786 }
5787 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5788}
5789
5790template <unsigned N>
5791static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5792 SelectionDAG &DAG, bool IsSigned = false) {
5793 SDLoc DL(Node);
5794 EVT ResTy = Node->getValueType(0);
5795 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5796
5797 // Check the ImmArg.
5798 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5799 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5800 DAG.getContext()->emitError(Node->getOperationName(0) +
5801 ": argument out of range.");
5802 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5803 }
5804 return DAG.getConstant(
5806 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5807 DL, ResTy);
5808}
5809
5811 SDLoc DL(Node);
5812 EVT ResTy = Node->getValueType(0);
5813 SDValue Vec = Node->getOperand(2);
5814 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5815 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5816}
5817
5819 SDLoc DL(Node);
5820 EVT ResTy = Node->getValueType(0);
5821 SDValue One = DAG.getConstant(1, DL, ResTy);
5822 SDValue Bit =
5823 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5824
5825 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5826 DAG.getNOT(DL, Bit, ResTy));
5827}
5828
5829template <unsigned N>
5831 SDLoc DL(Node);
5832 EVT ResTy = Node->getValueType(0);
5833 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5834 // Check the unsigned ImmArg.
5835 if (!isUInt<N>(CImm->getZExtValue())) {
5836 DAG.getContext()->emitError(Node->getOperationName(0) +
5837 ": argument out of range.");
5838 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5839 }
5840
5841 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5842 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5843
5844 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5845}
5846
5847template <unsigned N>
5849 SDLoc DL(Node);
5850 EVT ResTy = Node->getValueType(0);
5851 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5852 // Check the unsigned ImmArg.
5853 if (!isUInt<N>(CImm->getZExtValue())) {
5854 DAG.getContext()->emitError(Node->getOperationName(0) +
5855 ": argument out of range.");
5856 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5857 }
5858
5859 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5860 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5861 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5862}
5863
5864template <unsigned N>
5866 SDLoc DL(Node);
5867 EVT ResTy = Node->getValueType(0);
5868 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5869 // Check the unsigned ImmArg.
5870 if (!isUInt<N>(CImm->getZExtValue())) {
5871 DAG.getContext()->emitError(Node->getOperationName(0) +
5872 ": argument out of range.");
5873 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5874 }
5875
5876 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5877 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5878 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5879}
5880
5881static SDValue
5884 const LoongArchSubtarget &Subtarget) {
5885 SDLoc DL(N);
5886 switch (N->getConstantOperandVal(0)) {
5887 default:
5888 break;
5889 case Intrinsic::loongarch_lsx_vadd_b:
5890 case Intrinsic::loongarch_lsx_vadd_h:
5891 case Intrinsic::loongarch_lsx_vadd_w:
5892 case Intrinsic::loongarch_lsx_vadd_d:
5893 case Intrinsic::loongarch_lasx_xvadd_b:
5894 case Intrinsic::loongarch_lasx_xvadd_h:
5895 case Intrinsic::loongarch_lasx_xvadd_w:
5896 case Intrinsic::loongarch_lasx_xvadd_d:
5897 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5898 N->getOperand(2));
5899 case Intrinsic::loongarch_lsx_vaddi_bu:
5900 case Intrinsic::loongarch_lsx_vaddi_hu:
5901 case Intrinsic::loongarch_lsx_vaddi_wu:
5902 case Intrinsic::loongarch_lsx_vaddi_du:
5903 case Intrinsic::loongarch_lasx_xvaddi_bu:
5904 case Intrinsic::loongarch_lasx_xvaddi_hu:
5905 case Intrinsic::loongarch_lasx_xvaddi_wu:
5906 case Intrinsic::loongarch_lasx_xvaddi_du:
5907 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5908 lowerVectorSplatImm<5>(N, 2, DAG));
5909 case Intrinsic::loongarch_lsx_vsub_b:
5910 case Intrinsic::loongarch_lsx_vsub_h:
5911 case Intrinsic::loongarch_lsx_vsub_w:
5912 case Intrinsic::loongarch_lsx_vsub_d:
5913 case Intrinsic::loongarch_lasx_xvsub_b:
5914 case Intrinsic::loongarch_lasx_xvsub_h:
5915 case Intrinsic::loongarch_lasx_xvsub_w:
5916 case Intrinsic::loongarch_lasx_xvsub_d:
5917 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5918 N->getOperand(2));
5919 case Intrinsic::loongarch_lsx_vsubi_bu:
5920 case Intrinsic::loongarch_lsx_vsubi_hu:
5921 case Intrinsic::loongarch_lsx_vsubi_wu:
5922 case Intrinsic::loongarch_lsx_vsubi_du:
5923 case Intrinsic::loongarch_lasx_xvsubi_bu:
5924 case Intrinsic::loongarch_lasx_xvsubi_hu:
5925 case Intrinsic::loongarch_lasx_xvsubi_wu:
5926 case Intrinsic::loongarch_lasx_xvsubi_du:
5927 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5928 lowerVectorSplatImm<5>(N, 2, DAG));
5929 case Intrinsic::loongarch_lsx_vneg_b:
5930 case Intrinsic::loongarch_lsx_vneg_h:
5931 case Intrinsic::loongarch_lsx_vneg_w:
5932 case Intrinsic::loongarch_lsx_vneg_d:
5933 case Intrinsic::loongarch_lasx_xvneg_b:
5934 case Intrinsic::loongarch_lasx_xvneg_h:
5935 case Intrinsic::loongarch_lasx_xvneg_w:
5936 case Intrinsic::loongarch_lasx_xvneg_d:
5937 return DAG.getNode(
5938 ISD::SUB, DL, N->getValueType(0),
5939 DAG.getConstant(
5940 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5941 /*isSigned=*/true),
5942 SDLoc(N), N->getValueType(0)),
5943 N->getOperand(1));
5944 case Intrinsic::loongarch_lsx_vmax_b:
5945 case Intrinsic::loongarch_lsx_vmax_h:
5946 case Intrinsic::loongarch_lsx_vmax_w:
5947 case Intrinsic::loongarch_lsx_vmax_d:
5948 case Intrinsic::loongarch_lasx_xvmax_b:
5949 case Intrinsic::loongarch_lasx_xvmax_h:
5950 case Intrinsic::loongarch_lasx_xvmax_w:
5951 case Intrinsic::loongarch_lasx_xvmax_d:
5952 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5953 N->getOperand(2));
5954 case Intrinsic::loongarch_lsx_vmax_bu:
5955 case Intrinsic::loongarch_lsx_vmax_hu:
5956 case Intrinsic::loongarch_lsx_vmax_wu:
5957 case Intrinsic::loongarch_lsx_vmax_du:
5958 case Intrinsic::loongarch_lasx_xvmax_bu:
5959 case Intrinsic::loongarch_lasx_xvmax_hu:
5960 case Intrinsic::loongarch_lasx_xvmax_wu:
5961 case Intrinsic::loongarch_lasx_xvmax_du:
5962 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5963 N->getOperand(2));
5964 case Intrinsic::loongarch_lsx_vmaxi_b:
5965 case Intrinsic::loongarch_lsx_vmaxi_h:
5966 case Intrinsic::loongarch_lsx_vmaxi_w:
5967 case Intrinsic::loongarch_lsx_vmaxi_d:
5968 case Intrinsic::loongarch_lasx_xvmaxi_b:
5969 case Intrinsic::loongarch_lasx_xvmaxi_h:
5970 case Intrinsic::loongarch_lasx_xvmaxi_w:
5971 case Intrinsic::loongarch_lasx_xvmaxi_d:
5972 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5973 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5974 case Intrinsic::loongarch_lsx_vmaxi_bu:
5975 case Intrinsic::loongarch_lsx_vmaxi_hu:
5976 case Intrinsic::loongarch_lsx_vmaxi_wu:
5977 case Intrinsic::loongarch_lsx_vmaxi_du:
5978 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5979 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5980 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5981 case Intrinsic::loongarch_lasx_xvmaxi_du:
5982 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5983 lowerVectorSplatImm<5>(N, 2, DAG));
5984 case Intrinsic::loongarch_lsx_vmin_b:
5985 case Intrinsic::loongarch_lsx_vmin_h:
5986 case Intrinsic::loongarch_lsx_vmin_w:
5987 case Intrinsic::loongarch_lsx_vmin_d:
5988 case Intrinsic::loongarch_lasx_xvmin_b:
5989 case Intrinsic::loongarch_lasx_xvmin_h:
5990 case Intrinsic::loongarch_lasx_xvmin_w:
5991 case Intrinsic::loongarch_lasx_xvmin_d:
5992 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5993 N->getOperand(2));
5994 case Intrinsic::loongarch_lsx_vmin_bu:
5995 case Intrinsic::loongarch_lsx_vmin_hu:
5996 case Intrinsic::loongarch_lsx_vmin_wu:
5997 case Intrinsic::loongarch_lsx_vmin_du:
5998 case Intrinsic::loongarch_lasx_xvmin_bu:
5999 case Intrinsic::loongarch_lasx_xvmin_hu:
6000 case Intrinsic::loongarch_lasx_xvmin_wu:
6001 case Intrinsic::loongarch_lasx_xvmin_du:
6002 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6003 N->getOperand(2));
6004 case Intrinsic::loongarch_lsx_vmini_b:
6005 case Intrinsic::loongarch_lsx_vmini_h:
6006 case Intrinsic::loongarch_lsx_vmini_w:
6007 case Intrinsic::loongarch_lsx_vmini_d:
6008 case Intrinsic::loongarch_lasx_xvmini_b:
6009 case Intrinsic::loongarch_lasx_xvmini_h:
6010 case Intrinsic::loongarch_lasx_xvmini_w:
6011 case Intrinsic::loongarch_lasx_xvmini_d:
6012 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6013 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6014 case Intrinsic::loongarch_lsx_vmini_bu:
6015 case Intrinsic::loongarch_lsx_vmini_hu:
6016 case Intrinsic::loongarch_lsx_vmini_wu:
6017 case Intrinsic::loongarch_lsx_vmini_du:
6018 case Intrinsic::loongarch_lasx_xvmini_bu:
6019 case Intrinsic::loongarch_lasx_xvmini_hu:
6020 case Intrinsic::loongarch_lasx_xvmini_wu:
6021 case Intrinsic::loongarch_lasx_xvmini_du:
6022 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6023 lowerVectorSplatImm<5>(N, 2, DAG));
6024 case Intrinsic::loongarch_lsx_vmul_b:
6025 case Intrinsic::loongarch_lsx_vmul_h:
6026 case Intrinsic::loongarch_lsx_vmul_w:
6027 case Intrinsic::loongarch_lsx_vmul_d:
6028 case Intrinsic::loongarch_lasx_xvmul_b:
6029 case Intrinsic::loongarch_lasx_xvmul_h:
6030 case Intrinsic::loongarch_lasx_xvmul_w:
6031 case Intrinsic::loongarch_lasx_xvmul_d:
6032 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6033 N->getOperand(2));
6034 case Intrinsic::loongarch_lsx_vmadd_b:
6035 case Intrinsic::loongarch_lsx_vmadd_h:
6036 case Intrinsic::loongarch_lsx_vmadd_w:
6037 case Intrinsic::loongarch_lsx_vmadd_d:
6038 case Intrinsic::loongarch_lasx_xvmadd_b:
6039 case Intrinsic::loongarch_lasx_xvmadd_h:
6040 case Intrinsic::loongarch_lasx_xvmadd_w:
6041 case Intrinsic::loongarch_lasx_xvmadd_d: {
6042 EVT ResTy = N->getValueType(0);
6043 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6044 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6045 N->getOperand(3)));
6046 }
6047 case Intrinsic::loongarch_lsx_vmsub_b:
6048 case Intrinsic::loongarch_lsx_vmsub_h:
6049 case Intrinsic::loongarch_lsx_vmsub_w:
6050 case Intrinsic::loongarch_lsx_vmsub_d:
6051 case Intrinsic::loongarch_lasx_xvmsub_b:
6052 case Intrinsic::loongarch_lasx_xvmsub_h:
6053 case Intrinsic::loongarch_lasx_xvmsub_w:
6054 case Intrinsic::loongarch_lasx_xvmsub_d: {
6055 EVT ResTy = N->getValueType(0);
6056 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6057 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6058 N->getOperand(3)));
6059 }
6060 case Intrinsic::loongarch_lsx_vdiv_b:
6061 case Intrinsic::loongarch_lsx_vdiv_h:
6062 case Intrinsic::loongarch_lsx_vdiv_w:
6063 case Intrinsic::loongarch_lsx_vdiv_d:
6064 case Intrinsic::loongarch_lasx_xvdiv_b:
6065 case Intrinsic::loongarch_lasx_xvdiv_h:
6066 case Intrinsic::loongarch_lasx_xvdiv_w:
6067 case Intrinsic::loongarch_lasx_xvdiv_d:
6068 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6069 N->getOperand(2));
6070 case Intrinsic::loongarch_lsx_vdiv_bu:
6071 case Intrinsic::loongarch_lsx_vdiv_hu:
6072 case Intrinsic::loongarch_lsx_vdiv_wu:
6073 case Intrinsic::loongarch_lsx_vdiv_du:
6074 case Intrinsic::loongarch_lasx_xvdiv_bu:
6075 case Intrinsic::loongarch_lasx_xvdiv_hu:
6076 case Intrinsic::loongarch_lasx_xvdiv_wu:
6077 case Intrinsic::loongarch_lasx_xvdiv_du:
6078 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6079 N->getOperand(2));
6080 case Intrinsic::loongarch_lsx_vmod_b:
6081 case Intrinsic::loongarch_lsx_vmod_h:
6082 case Intrinsic::loongarch_lsx_vmod_w:
6083 case Intrinsic::loongarch_lsx_vmod_d:
6084 case Intrinsic::loongarch_lasx_xvmod_b:
6085 case Intrinsic::loongarch_lasx_xvmod_h:
6086 case Intrinsic::loongarch_lasx_xvmod_w:
6087 case Intrinsic::loongarch_lasx_xvmod_d:
6088 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6089 N->getOperand(2));
6090 case Intrinsic::loongarch_lsx_vmod_bu:
6091 case Intrinsic::loongarch_lsx_vmod_hu:
6092 case Intrinsic::loongarch_lsx_vmod_wu:
6093 case Intrinsic::loongarch_lsx_vmod_du:
6094 case Intrinsic::loongarch_lasx_xvmod_bu:
6095 case Intrinsic::loongarch_lasx_xvmod_hu:
6096 case Intrinsic::loongarch_lasx_xvmod_wu:
6097 case Intrinsic::loongarch_lasx_xvmod_du:
6098 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6099 N->getOperand(2));
6100 case Intrinsic::loongarch_lsx_vand_v:
6101 case Intrinsic::loongarch_lasx_xvand_v:
6102 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6103 N->getOperand(2));
6104 case Intrinsic::loongarch_lsx_vor_v:
6105 case Intrinsic::loongarch_lasx_xvor_v:
6106 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6107 N->getOperand(2));
6108 case Intrinsic::loongarch_lsx_vxor_v:
6109 case Intrinsic::loongarch_lasx_xvxor_v:
6110 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6111 N->getOperand(2));
6112 case Intrinsic::loongarch_lsx_vnor_v:
6113 case Intrinsic::loongarch_lasx_xvnor_v: {
6114 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6115 N->getOperand(2));
6116 return DAG.getNOT(DL, Res, Res->getValueType(0));
6117 }
6118 case Intrinsic::loongarch_lsx_vandi_b:
6119 case Intrinsic::loongarch_lasx_xvandi_b:
6120 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6121 lowerVectorSplatImm<8>(N, 2, DAG));
6122 case Intrinsic::loongarch_lsx_vori_b:
6123 case Intrinsic::loongarch_lasx_xvori_b:
6124 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6125 lowerVectorSplatImm<8>(N, 2, DAG));
6126 case Intrinsic::loongarch_lsx_vxori_b:
6127 case Intrinsic::loongarch_lasx_xvxori_b:
6128 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6129 lowerVectorSplatImm<8>(N, 2, DAG));
6130 case Intrinsic::loongarch_lsx_vsll_b:
6131 case Intrinsic::loongarch_lsx_vsll_h:
6132 case Intrinsic::loongarch_lsx_vsll_w:
6133 case Intrinsic::loongarch_lsx_vsll_d:
6134 case Intrinsic::loongarch_lasx_xvsll_b:
6135 case Intrinsic::loongarch_lasx_xvsll_h:
6136 case Intrinsic::loongarch_lasx_xvsll_w:
6137 case Intrinsic::loongarch_lasx_xvsll_d:
6138 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6139 truncateVecElts(N, DAG));
6140 case Intrinsic::loongarch_lsx_vslli_b:
6141 case Intrinsic::loongarch_lasx_xvslli_b:
6142 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6143 lowerVectorSplatImm<3>(N, 2, DAG));
6144 case Intrinsic::loongarch_lsx_vslli_h:
6145 case Intrinsic::loongarch_lasx_xvslli_h:
6146 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6147 lowerVectorSplatImm<4>(N, 2, DAG));
6148 case Intrinsic::loongarch_lsx_vslli_w:
6149 case Intrinsic::loongarch_lasx_xvslli_w:
6150 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6151 lowerVectorSplatImm<5>(N, 2, DAG));
6152 case Intrinsic::loongarch_lsx_vslli_d:
6153 case Intrinsic::loongarch_lasx_xvslli_d:
6154 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6155 lowerVectorSplatImm<6>(N, 2, DAG));
6156 case Intrinsic::loongarch_lsx_vsrl_b:
6157 case Intrinsic::loongarch_lsx_vsrl_h:
6158 case Intrinsic::loongarch_lsx_vsrl_w:
6159 case Intrinsic::loongarch_lsx_vsrl_d:
6160 case Intrinsic::loongarch_lasx_xvsrl_b:
6161 case Intrinsic::loongarch_lasx_xvsrl_h:
6162 case Intrinsic::loongarch_lasx_xvsrl_w:
6163 case Intrinsic::loongarch_lasx_xvsrl_d:
6164 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6165 truncateVecElts(N, DAG));
6166 case Intrinsic::loongarch_lsx_vsrli_b:
6167 case Intrinsic::loongarch_lasx_xvsrli_b:
6168 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6169 lowerVectorSplatImm<3>(N, 2, DAG));
6170 case Intrinsic::loongarch_lsx_vsrli_h:
6171 case Intrinsic::loongarch_lasx_xvsrli_h:
6172 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6173 lowerVectorSplatImm<4>(N, 2, DAG));
6174 case Intrinsic::loongarch_lsx_vsrli_w:
6175 case Intrinsic::loongarch_lasx_xvsrli_w:
6176 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6177 lowerVectorSplatImm<5>(N, 2, DAG));
6178 case Intrinsic::loongarch_lsx_vsrli_d:
6179 case Intrinsic::loongarch_lasx_xvsrli_d:
6180 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6181 lowerVectorSplatImm<6>(N, 2, DAG));
6182 case Intrinsic::loongarch_lsx_vsra_b:
6183 case Intrinsic::loongarch_lsx_vsra_h:
6184 case Intrinsic::loongarch_lsx_vsra_w:
6185 case Intrinsic::loongarch_lsx_vsra_d:
6186 case Intrinsic::loongarch_lasx_xvsra_b:
6187 case Intrinsic::loongarch_lasx_xvsra_h:
6188 case Intrinsic::loongarch_lasx_xvsra_w:
6189 case Intrinsic::loongarch_lasx_xvsra_d:
6190 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6191 truncateVecElts(N, DAG));
6192 case Intrinsic::loongarch_lsx_vsrai_b:
6193 case Intrinsic::loongarch_lasx_xvsrai_b:
6194 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6195 lowerVectorSplatImm<3>(N, 2, DAG));
6196 case Intrinsic::loongarch_lsx_vsrai_h:
6197 case Intrinsic::loongarch_lasx_xvsrai_h:
6198 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6199 lowerVectorSplatImm<4>(N, 2, DAG));
6200 case Intrinsic::loongarch_lsx_vsrai_w:
6201 case Intrinsic::loongarch_lasx_xvsrai_w:
6202 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6203 lowerVectorSplatImm<5>(N, 2, DAG));
6204 case Intrinsic::loongarch_lsx_vsrai_d:
6205 case Intrinsic::loongarch_lasx_xvsrai_d:
6206 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6207 lowerVectorSplatImm<6>(N, 2, DAG));
6208 case Intrinsic::loongarch_lsx_vclz_b:
6209 case Intrinsic::loongarch_lsx_vclz_h:
6210 case Intrinsic::loongarch_lsx_vclz_w:
6211 case Intrinsic::loongarch_lsx_vclz_d:
6212 case Intrinsic::loongarch_lasx_xvclz_b:
6213 case Intrinsic::loongarch_lasx_xvclz_h:
6214 case Intrinsic::loongarch_lasx_xvclz_w:
6215 case Intrinsic::loongarch_lasx_xvclz_d:
6216 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6217 case Intrinsic::loongarch_lsx_vpcnt_b:
6218 case Intrinsic::loongarch_lsx_vpcnt_h:
6219 case Intrinsic::loongarch_lsx_vpcnt_w:
6220 case Intrinsic::loongarch_lsx_vpcnt_d:
6221 case Intrinsic::loongarch_lasx_xvpcnt_b:
6222 case Intrinsic::loongarch_lasx_xvpcnt_h:
6223 case Intrinsic::loongarch_lasx_xvpcnt_w:
6224 case Intrinsic::loongarch_lasx_xvpcnt_d:
6225 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6226 case Intrinsic::loongarch_lsx_vbitclr_b:
6227 case Intrinsic::loongarch_lsx_vbitclr_h:
6228 case Intrinsic::loongarch_lsx_vbitclr_w:
6229 case Intrinsic::loongarch_lsx_vbitclr_d:
6230 case Intrinsic::loongarch_lasx_xvbitclr_b:
6231 case Intrinsic::loongarch_lasx_xvbitclr_h:
6232 case Intrinsic::loongarch_lasx_xvbitclr_w:
6233 case Intrinsic::loongarch_lasx_xvbitclr_d:
6234 return lowerVectorBitClear(N, DAG);
6235 case Intrinsic::loongarch_lsx_vbitclri_b:
6236 case Intrinsic::loongarch_lasx_xvbitclri_b:
6237 return lowerVectorBitClearImm<3>(N, DAG);
6238 case Intrinsic::loongarch_lsx_vbitclri_h:
6239 case Intrinsic::loongarch_lasx_xvbitclri_h:
6240 return lowerVectorBitClearImm<4>(N, DAG);
6241 case Intrinsic::loongarch_lsx_vbitclri_w:
6242 case Intrinsic::loongarch_lasx_xvbitclri_w:
6243 return lowerVectorBitClearImm<5>(N, DAG);
6244 case Intrinsic::loongarch_lsx_vbitclri_d:
6245 case Intrinsic::loongarch_lasx_xvbitclri_d:
6246 return lowerVectorBitClearImm<6>(N, DAG);
6247 case Intrinsic::loongarch_lsx_vbitset_b:
6248 case Intrinsic::loongarch_lsx_vbitset_h:
6249 case Intrinsic::loongarch_lsx_vbitset_w:
6250 case Intrinsic::loongarch_lsx_vbitset_d:
6251 case Intrinsic::loongarch_lasx_xvbitset_b:
6252 case Intrinsic::loongarch_lasx_xvbitset_h:
6253 case Intrinsic::loongarch_lasx_xvbitset_w:
6254 case Intrinsic::loongarch_lasx_xvbitset_d: {
6255 EVT VecTy = N->getValueType(0);
6256 SDValue One = DAG.getConstant(1, DL, VecTy);
6257 return DAG.getNode(
6258 ISD::OR, DL, VecTy, N->getOperand(1),
6259 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6260 }
6261 case Intrinsic::loongarch_lsx_vbitseti_b:
6262 case Intrinsic::loongarch_lasx_xvbitseti_b:
6263 return lowerVectorBitSetImm<3>(N, DAG);
6264 case Intrinsic::loongarch_lsx_vbitseti_h:
6265 case Intrinsic::loongarch_lasx_xvbitseti_h:
6266 return lowerVectorBitSetImm<4>(N, DAG);
6267 case Intrinsic::loongarch_lsx_vbitseti_w:
6268 case Intrinsic::loongarch_lasx_xvbitseti_w:
6269 return lowerVectorBitSetImm<5>(N, DAG);
6270 case Intrinsic::loongarch_lsx_vbitseti_d:
6271 case Intrinsic::loongarch_lasx_xvbitseti_d:
6272 return lowerVectorBitSetImm<6>(N, DAG);
6273 case Intrinsic::loongarch_lsx_vbitrev_b:
6274 case Intrinsic::loongarch_lsx_vbitrev_h:
6275 case Intrinsic::loongarch_lsx_vbitrev_w:
6276 case Intrinsic::loongarch_lsx_vbitrev_d:
6277 case Intrinsic::loongarch_lasx_xvbitrev_b:
6278 case Intrinsic::loongarch_lasx_xvbitrev_h:
6279 case Intrinsic::loongarch_lasx_xvbitrev_w:
6280 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6281 EVT VecTy = N->getValueType(0);
6282 SDValue One = DAG.getConstant(1, DL, VecTy);
6283 return DAG.getNode(
6284 ISD::XOR, DL, VecTy, N->getOperand(1),
6285 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6286 }
6287 case Intrinsic::loongarch_lsx_vbitrevi_b:
6288 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6289 return lowerVectorBitRevImm<3>(N, DAG);
6290 case Intrinsic::loongarch_lsx_vbitrevi_h:
6291 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6292 return lowerVectorBitRevImm<4>(N, DAG);
6293 case Intrinsic::loongarch_lsx_vbitrevi_w:
6294 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6295 return lowerVectorBitRevImm<5>(N, DAG);
6296 case Intrinsic::loongarch_lsx_vbitrevi_d:
6297 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6298 return lowerVectorBitRevImm<6>(N, DAG);
6299 case Intrinsic::loongarch_lsx_vfadd_s:
6300 case Intrinsic::loongarch_lsx_vfadd_d:
6301 case Intrinsic::loongarch_lasx_xvfadd_s:
6302 case Intrinsic::loongarch_lasx_xvfadd_d:
6303 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6304 N->getOperand(2));
6305 case Intrinsic::loongarch_lsx_vfsub_s:
6306 case Intrinsic::loongarch_lsx_vfsub_d:
6307 case Intrinsic::loongarch_lasx_xvfsub_s:
6308 case Intrinsic::loongarch_lasx_xvfsub_d:
6309 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6310 N->getOperand(2));
6311 case Intrinsic::loongarch_lsx_vfmul_s:
6312 case Intrinsic::loongarch_lsx_vfmul_d:
6313 case Intrinsic::loongarch_lasx_xvfmul_s:
6314 case Intrinsic::loongarch_lasx_xvfmul_d:
6315 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6316 N->getOperand(2));
6317 case Intrinsic::loongarch_lsx_vfdiv_s:
6318 case Intrinsic::loongarch_lsx_vfdiv_d:
6319 case Intrinsic::loongarch_lasx_xvfdiv_s:
6320 case Intrinsic::loongarch_lasx_xvfdiv_d:
6321 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6322 N->getOperand(2));
6323 case Intrinsic::loongarch_lsx_vfmadd_s:
6324 case Intrinsic::loongarch_lsx_vfmadd_d:
6325 case Intrinsic::loongarch_lasx_xvfmadd_s:
6326 case Intrinsic::loongarch_lasx_xvfmadd_d:
6327 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6328 N->getOperand(2), N->getOperand(3));
6329 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6330 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6331 N->getOperand(1), N->getOperand(2),
6332 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6333 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6334 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6335 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6336 N->getOperand(1), N->getOperand(2),
6337 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6338 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6339 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6340 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6341 N->getOperand(1), N->getOperand(2),
6342 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6343 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6344 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6345 N->getOperand(1), N->getOperand(2),
6346 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6347 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6348 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6349 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6350 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6351 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6352 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6353 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6354 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6355 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6356 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6357 N->getOperand(1)));
6358 case Intrinsic::loongarch_lsx_vreplve_b:
6359 case Intrinsic::loongarch_lsx_vreplve_h:
6360 case Intrinsic::loongarch_lsx_vreplve_w:
6361 case Intrinsic::loongarch_lsx_vreplve_d:
6362 case Intrinsic::loongarch_lasx_xvreplve_b:
6363 case Intrinsic::loongarch_lasx_xvreplve_h:
6364 case Intrinsic::loongarch_lasx_xvreplve_w:
6365 case Intrinsic::loongarch_lasx_xvreplve_d:
6366 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6367 N->getOperand(1),
6368 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6369 N->getOperand(2)));
6370 }
6371 return SDValue();
6372}
6373
6376 const LoongArchSubtarget &Subtarget) {
6377 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6378 // conversion is unnecessary and can be replaced with the
6379 // MOVFR2GR_S_LA64 operand.
6380 SDValue Op0 = N->getOperand(0);
6382 return Op0.getOperand(0);
6383 return SDValue();
6384}
6385
6388 const LoongArchSubtarget &Subtarget) {
6389 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6390 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6391 // operand.
6392 SDValue Op0 = N->getOperand(0);
6394 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6395 "Unexpected value type!");
6396 return Op0.getOperand(0);
6397 }
6398 return SDValue();
6399}
6400
6403 const LoongArchSubtarget &Subtarget) {
6404 MVT VT = N->getSimpleValueType(0);
6405 unsigned NumBits = VT.getScalarSizeInBits();
6406
6407 // Simplify the inputs.
6408 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6409 APInt DemandedMask(APInt::getAllOnes(NumBits));
6410 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6411 return SDValue(N, 0);
6412
6413 return SDValue();
6414}
6415
6416static SDValue
6419 const LoongArchSubtarget &Subtarget) {
6420 SDValue Op0 = N->getOperand(0);
6421 SDLoc DL(N);
6422
6423 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6424 // redundant. Instead, use BuildPairF64's operands directly.
6426 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6427
6428 if (Op0->isUndef()) {
6429 SDValue Lo = DAG.getUNDEF(MVT::i32);
6430 SDValue Hi = DAG.getUNDEF(MVT::i32);
6431 return DCI.CombineTo(N, Lo, Hi);
6432 }
6433
6434 // It's cheaper to materialise two 32-bit integers than to load a double
6435 // from the constant pool and transfer it to integer registers through the
6436 // stack.
6438 APInt V = C->getValueAPF().bitcastToAPInt();
6439 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6440 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6441 return DCI.CombineTo(N, Lo, Hi);
6442 }
6443
6444 return SDValue();
6445}
6446
6447static SDValue
6450 const LoongArchSubtarget &Subtarget) {
6451 if (!DCI.isBeforeLegalize())
6452 return SDValue();
6453
6454 MVT EltVT = N->getSimpleValueType(0);
6455 SDValue Vec = N->getOperand(0);
6456 EVT VecTy = Vec->getValueType(0);
6457 SDValue Idx = N->getOperand(1);
6458 unsigned IdxOp = Idx.getOpcode();
6459 SDLoc DL(N);
6460
6461 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6462 return SDValue();
6463
6464 // Combine:
6465 // t2 = truncate t1
6466 // t3 = {zero/sign/any}_extend t2
6467 // t4 = extract_vector_elt t0, t3
6468 // to:
6469 // t4 = extract_vector_elt t0, t1
6470 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6471 IdxOp == ISD::ANY_EXTEND) {
6472 SDValue IdxOrig = Idx.getOperand(0);
6473 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6474 return SDValue();
6475
6476 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6477 IdxOrig.getOperand(0));
6478 }
6479
6480 return SDValue();
6481}
6482
6484 DAGCombinerInfo &DCI) const {
6485 SelectionDAG &DAG = DCI.DAG;
6486 switch (N->getOpcode()) {
6487 default:
6488 break;
6489 case ISD::AND:
6490 return performANDCombine(N, DAG, DCI, Subtarget);
6491 case ISD::OR:
6492 return performORCombine(N, DAG, DCI, Subtarget);
6493 case ISD::SETCC:
6494 return performSETCCCombine(N, DAG, DCI, Subtarget);
6495 case ISD::SRL:
6496 return performSRLCombine(N, DAG, DCI, Subtarget);
6497 case ISD::BITCAST:
6498 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6500 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6502 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6504 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6506 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6508 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6510 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6513 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6515 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6517 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6518 }
6519 return SDValue();
6520}
6521
6524 if (!ZeroDivCheck)
6525 return MBB;
6526
6527 // Build instructions:
6528 // MBB:
6529 // div(or mod) $dst, $dividend, $divisor
6530 // bne $divisor, $zero, SinkMBB
6531 // BreakMBB:
6532 // break 7 // BRK_DIVZERO
6533 // SinkMBB:
6534 // fallthrough
6535 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6536 MachineFunction::iterator It = ++MBB->getIterator();
6537 MachineFunction *MF = MBB->getParent();
6538 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6539 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6540 MF->insert(It, BreakMBB);
6541 MF->insert(It, SinkMBB);
6542
6543 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6544 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6545 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6546
6547 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6548 DebugLoc DL = MI.getDebugLoc();
6549 MachineOperand &Divisor = MI.getOperand(2);
6550 Register DivisorReg = Divisor.getReg();
6551
6552 // MBB:
6553 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6554 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6555 .addReg(LoongArch::R0)
6556 .addMBB(SinkMBB);
6557 MBB->addSuccessor(BreakMBB);
6558 MBB->addSuccessor(SinkMBB);
6559
6560 // BreakMBB:
6561 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6562 // definition of BRK_DIVZERO.
6563 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6564 BreakMBB->addSuccessor(SinkMBB);
6565
6566 // Clear Divisor's kill flag.
6567 Divisor.setIsKill(false);
6568
6569 return SinkMBB;
6570}
6571
6572static MachineBasicBlock *
6574 const LoongArchSubtarget &Subtarget) {
6575 unsigned CondOpc;
6576 switch (MI.getOpcode()) {
6577 default:
6578 llvm_unreachable("Unexpected opcode");
6579 case LoongArch::PseudoVBZ:
6580 CondOpc = LoongArch::VSETEQZ_V;
6581 break;
6582 case LoongArch::PseudoVBZ_B:
6583 CondOpc = LoongArch::VSETANYEQZ_B;
6584 break;
6585 case LoongArch::PseudoVBZ_H:
6586 CondOpc = LoongArch::VSETANYEQZ_H;
6587 break;
6588 case LoongArch::PseudoVBZ_W:
6589 CondOpc = LoongArch::VSETANYEQZ_W;
6590 break;
6591 case LoongArch::PseudoVBZ_D:
6592 CondOpc = LoongArch::VSETANYEQZ_D;
6593 break;
6594 case LoongArch::PseudoVBNZ:
6595 CondOpc = LoongArch::VSETNEZ_V;
6596 break;
6597 case LoongArch::PseudoVBNZ_B:
6598 CondOpc = LoongArch::VSETALLNEZ_B;
6599 break;
6600 case LoongArch::PseudoVBNZ_H:
6601 CondOpc = LoongArch::VSETALLNEZ_H;
6602 break;
6603 case LoongArch::PseudoVBNZ_W:
6604 CondOpc = LoongArch::VSETALLNEZ_W;
6605 break;
6606 case LoongArch::PseudoVBNZ_D:
6607 CondOpc = LoongArch::VSETALLNEZ_D;
6608 break;
6609 case LoongArch::PseudoXVBZ:
6610 CondOpc = LoongArch::XVSETEQZ_V;
6611 break;
6612 case LoongArch::PseudoXVBZ_B:
6613 CondOpc = LoongArch::XVSETANYEQZ_B;
6614 break;
6615 case LoongArch::PseudoXVBZ_H:
6616 CondOpc = LoongArch::XVSETANYEQZ_H;
6617 break;
6618 case LoongArch::PseudoXVBZ_W:
6619 CondOpc = LoongArch::XVSETANYEQZ_W;
6620 break;
6621 case LoongArch::PseudoXVBZ_D:
6622 CondOpc = LoongArch::XVSETANYEQZ_D;
6623 break;
6624 case LoongArch::PseudoXVBNZ:
6625 CondOpc = LoongArch::XVSETNEZ_V;
6626 break;
6627 case LoongArch::PseudoXVBNZ_B:
6628 CondOpc = LoongArch::XVSETALLNEZ_B;
6629 break;
6630 case LoongArch::PseudoXVBNZ_H:
6631 CondOpc = LoongArch::XVSETALLNEZ_H;
6632 break;
6633 case LoongArch::PseudoXVBNZ_W:
6634 CondOpc = LoongArch::XVSETALLNEZ_W;
6635 break;
6636 case LoongArch::PseudoXVBNZ_D:
6637 CondOpc = LoongArch::XVSETALLNEZ_D;
6638 break;
6639 }
6640
6641 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6642 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6643 DebugLoc DL = MI.getDebugLoc();
6646
6647 MachineFunction *F = BB->getParent();
6648 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6649 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6650 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6651
6652 F->insert(It, FalseBB);
6653 F->insert(It, TrueBB);
6654 F->insert(It, SinkBB);
6655
6656 // Transfer the remainder of MBB and its successor edges to Sink.
6657 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6659
6660 // Insert the real instruction to BB.
6661 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6662 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6663
6664 // Insert branch.
6665 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6666 BB->addSuccessor(FalseBB);
6667 BB->addSuccessor(TrueBB);
6668
6669 // FalseBB.
6670 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6671 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6672 .addReg(LoongArch::R0)
6673 .addImm(0);
6674 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6675 FalseBB->addSuccessor(SinkBB);
6676
6677 // TrueBB.
6678 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6679 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6680 .addReg(LoongArch::R0)
6681 .addImm(1);
6682 TrueBB->addSuccessor(SinkBB);
6683
6684 // SinkBB: merge the results.
6685 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6686 MI.getOperand(0).getReg())
6687 .addReg(RD1)
6688 .addMBB(FalseBB)
6689 .addReg(RD2)
6690 .addMBB(TrueBB);
6691
6692 // The pseudo instruction is gone now.
6693 MI.eraseFromParent();
6694 return SinkBB;
6695}
6696
6697static MachineBasicBlock *
6699 const LoongArchSubtarget &Subtarget) {
6700 unsigned InsOp;
6701 unsigned BroadcastOp;
6702 unsigned HalfSize;
6703 switch (MI.getOpcode()) {
6704 default:
6705 llvm_unreachable("Unexpected opcode");
6706 case LoongArch::PseudoXVINSGR2VR_B:
6707 HalfSize = 16;
6708 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6709 InsOp = LoongArch::XVEXTRINS_B;
6710 break;
6711 case LoongArch::PseudoXVINSGR2VR_H:
6712 HalfSize = 8;
6713 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6714 InsOp = LoongArch::XVEXTRINS_H;
6715 break;
6716 }
6717 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6718 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6719 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6720 DebugLoc DL = MI.getDebugLoc();
6722 // XDst = vector_insert XSrc, Elt, Idx
6723 Register XDst = MI.getOperand(0).getReg();
6724 Register XSrc = MI.getOperand(1).getReg();
6725 Register Elt = MI.getOperand(2).getReg();
6726 unsigned Idx = MI.getOperand(3).getImm();
6727
6728 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6729 Idx < HalfSize) {
6730 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6731 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6732
6733 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6734 .addReg(XSrc, 0, LoongArch::sub_128);
6735 BuildMI(*BB, MI, DL,
6736 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6737 : LoongArch::VINSGR2VR_B),
6738 ScratchSubReg2)
6739 .addReg(ScratchSubReg1)
6740 .addReg(Elt)
6741 .addImm(Idx);
6742
6743 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6744 .addImm(0)
6745 .addReg(ScratchSubReg2)
6746 .addImm(LoongArch::sub_128);
6747 } else {
6748 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6749 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6750
6751 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6752
6753 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6754 .addReg(ScratchReg1)
6755 .addReg(XSrc)
6756 .addImm(Idx >= HalfSize ? 48 : 18);
6757
6758 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6759 .addReg(XSrc)
6760 .addReg(ScratchReg2)
6761 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6762 }
6763
6764 MI.eraseFromParent();
6765 return BB;
6766}
6767
6770 const LoongArchSubtarget &Subtarget) {
6771 assert(Subtarget.hasExtLSX());
6772 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6773 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6774 DebugLoc DL = MI.getDebugLoc();
6776 Register Dst = MI.getOperand(0).getReg();
6777 Register Src = MI.getOperand(1).getReg();
6778 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6779 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6780 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6781
6782 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6783 BuildMI(*BB, MI, DL,
6784 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6785 : LoongArch::VINSGR2VR_W),
6786 ScratchReg2)
6787 .addReg(ScratchReg1)
6788 .addReg(Src)
6789 .addImm(0);
6790 BuildMI(
6791 *BB, MI, DL,
6792 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6793 ScratchReg3)
6794 .addReg(ScratchReg2);
6795 BuildMI(*BB, MI, DL,
6796 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6797 : LoongArch::VPICKVE2GR_W),
6798 Dst)
6799 .addReg(ScratchReg3)
6800 .addImm(0);
6801
6802 MI.eraseFromParent();
6803 return BB;
6804}
6805
6806static MachineBasicBlock *
6808 const LoongArchSubtarget &Subtarget) {
6809 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6810 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6811 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6813 Register Dst = MI.getOperand(0).getReg();
6814 Register Src = MI.getOperand(1).getReg();
6815 DebugLoc DL = MI.getDebugLoc();
6816 unsigned EleBits = 8;
6817 unsigned NotOpc = 0;
6818 unsigned MskOpc;
6819
6820 switch (MI.getOpcode()) {
6821 default:
6822 llvm_unreachable("Unexpected opcode");
6823 case LoongArch::PseudoVMSKLTZ_B:
6824 MskOpc = LoongArch::VMSKLTZ_B;
6825 break;
6826 case LoongArch::PseudoVMSKLTZ_H:
6827 MskOpc = LoongArch::VMSKLTZ_H;
6828 EleBits = 16;
6829 break;
6830 case LoongArch::PseudoVMSKLTZ_W:
6831 MskOpc = LoongArch::VMSKLTZ_W;
6832 EleBits = 32;
6833 break;
6834 case LoongArch::PseudoVMSKLTZ_D:
6835 MskOpc = LoongArch::VMSKLTZ_D;
6836 EleBits = 64;
6837 break;
6838 case LoongArch::PseudoVMSKGEZ_B:
6839 MskOpc = LoongArch::VMSKGEZ_B;
6840 break;
6841 case LoongArch::PseudoVMSKEQZ_B:
6842 MskOpc = LoongArch::VMSKNZ_B;
6843 NotOpc = LoongArch::VNOR_V;
6844 break;
6845 case LoongArch::PseudoVMSKNEZ_B:
6846 MskOpc = LoongArch::VMSKNZ_B;
6847 break;
6848 case LoongArch::PseudoXVMSKLTZ_B:
6849 MskOpc = LoongArch::XVMSKLTZ_B;
6850 RC = &LoongArch::LASX256RegClass;
6851 break;
6852 case LoongArch::PseudoXVMSKLTZ_H:
6853 MskOpc = LoongArch::XVMSKLTZ_H;
6854 RC = &LoongArch::LASX256RegClass;
6855 EleBits = 16;
6856 break;
6857 case LoongArch::PseudoXVMSKLTZ_W:
6858 MskOpc = LoongArch::XVMSKLTZ_W;
6859 RC = &LoongArch::LASX256RegClass;
6860 EleBits = 32;
6861 break;
6862 case LoongArch::PseudoXVMSKLTZ_D:
6863 MskOpc = LoongArch::XVMSKLTZ_D;
6864 RC = &LoongArch::LASX256RegClass;
6865 EleBits = 64;
6866 break;
6867 case LoongArch::PseudoXVMSKGEZ_B:
6868 MskOpc = LoongArch::XVMSKGEZ_B;
6869 RC = &LoongArch::LASX256RegClass;
6870 break;
6871 case LoongArch::PseudoXVMSKEQZ_B:
6872 MskOpc = LoongArch::XVMSKNZ_B;
6873 NotOpc = LoongArch::XVNOR_V;
6874 RC = &LoongArch::LASX256RegClass;
6875 break;
6876 case LoongArch::PseudoXVMSKNEZ_B:
6877 MskOpc = LoongArch::XVMSKNZ_B;
6878 RC = &LoongArch::LASX256RegClass;
6879 break;
6880 }
6881
6882 Register Msk = MRI.createVirtualRegister(RC);
6883 if (NotOpc) {
6884 Register Tmp = MRI.createVirtualRegister(RC);
6885 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6886 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6887 .addReg(Tmp, RegState::Kill)
6888 .addReg(Tmp, RegState::Kill);
6889 } else {
6890 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6891 }
6892
6893 if (TRI->getRegSizeInBits(*RC) > 128) {
6894 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6895 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6896 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6897 .addReg(Msk)
6898 .addImm(0);
6899 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6900 .addReg(Msk, RegState::Kill)
6901 .addImm(4);
6902 BuildMI(*BB, MI, DL,
6903 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6904 : LoongArch::BSTRINS_W),
6905 Dst)
6908 .addImm(256 / EleBits - 1)
6909 .addImm(128 / EleBits);
6910 } else {
6911 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6912 .addReg(Msk, RegState::Kill)
6913 .addImm(0);
6914 }
6915
6916 MI.eraseFromParent();
6917 return BB;
6918}
6919
6920static MachineBasicBlock *
6922 const LoongArchSubtarget &Subtarget) {
6923 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6924 "Unexpected instruction");
6925
6926 MachineFunction &MF = *BB->getParent();
6927 DebugLoc DL = MI.getDebugLoc();
6929 Register LoReg = MI.getOperand(0).getReg();
6930 Register HiReg = MI.getOperand(1).getReg();
6931 Register SrcReg = MI.getOperand(2).getReg();
6932
6933 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6934 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6935 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6936 MI.eraseFromParent(); // The pseudo instruction is gone now.
6937 return BB;
6938}
6939
6940static MachineBasicBlock *
6942 const LoongArchSubtarget &Subtarget) {
6943 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6944 "Unexpected instruction");
6945
6946 MachineFunction &MF = *BB->getParent();
6947 DebugLoc DL = MI.getDebugLoc();
6950 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6951 Register DstReg = MI.getOperand(0).getReg();
6952 Register LoReg = MI.getOperand(1).getReg();
6953 Register HiReg = MI.getOperand(2).getReg();
6954
6955 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6956 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6957 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6958 .addReg(TmpReg, RegState::Kill)
6959 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6960 MI.eraseFromParent(); // The pseudo instruction is gone now.
6961 return BB;
6962}
6963
6965 switch (MI.getOpcode()) {
6966 default:
6967 return false;
6968 case LoongArch::Select_GPR_Using_CC_GPR:
6969 return true;
6970 }
6971}
6972
6973static MachineBasicBlock *
6975 const LoongArchSubtarget &Subtarget) {
6976 // To "insert" Select_* instructions, we actually have to insert the triangle
6977 // control-flow pattern. The incoming instructions know the destination vreg
6978 // to set, the condition code register to branch on, the true/false values to
6979 // select between, and the condcode to use to select the appropriate branch.
6980 //
6981 // We produce the following control flow:
6982 // HeadMBB
6983 // | \
6984 // | IfFalseMBB
6985 // | /
6986 // TailMBB
6987 //
6988 // When we find a sequence of selects we attempt to optimize their emission
6989 // by sharing the control flow. Currently we only handle cases where we have
6990 // multiple selects with the exact same condition (same LHS, RHS and CC).
6991 // The selects may be interleaved with other instructions if the other
6992 // instructions meet some requirements we deem safe:
6993 // - They are not pseudo instructions.
6994 // - They are debug instructions. Otherwise,
6995 // - They do not have side-effects, do not access memory and their inputs do
6996 // not depend on the results of the select pseudo-instructions.
6997 // The TrueV/FalseV operands of the selects cannot depend on the result of
6998 // previous selects in the sequence.
6999 // These conditions could be further relaxed. See the X86 target for a
7000 // related approach and more information.
7001
7002 Register LHS = MI.getOperand(1).getReg();
7003 Register RHS;
7004 if (MI.getOperand(2).isReg())
7005 RHS = MI.getOperand(2).getReg();
7006 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7007
7008 SmallVector<MachineInstr *, 4> SelectDebugValues;
7009 SmallSet<Register, 4> SelectDests;
7010 SelectDests.insert(MI.getOperand(0).getReg());
7011
7012 MachineInstr *LastSelectPseudo = &MI;
7013 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7014 SequenceMBBI != E; ++SequenceMBBI) {
7015 if (SequenceMBBI->isDebugInstr())
7016 continue;
7017 if (isSelectPseudo(*SequenceMBBI)) {
7018 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7019 !SequenceMBBI->getOperand(2).isReg() ||
7020 SequenceMBBI->getOperand(2).getReg() != RHS ||
7021 SequenceMBBI->getOperand(3).getImm() != CC ||
7022 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7023 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7024 break;
7025 LastSelectPseudo = &*SequenceMBBI;
7026 SequenceMBBI->collectDebugValues(SelectDebugValues);
7027 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7028 continue;
7029 }
7030 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7031 SequenceMBBI->mayLoadOrStore() ||
7032 SequenceMBBI->usesCustomInsertionHook())
7033 break;
7034 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7035 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7036 }))
7037 break;
7038 }
7039
7040 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7041 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7042 DebugLoc DL = MI.getDebugLoc();
7044
7045 MachineBasicBlock *HeadMBB = BB;
7046 MachineFunction *F = BB->getParent();
7047 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7048 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7049
7050 F->insert(I, IfFalseMBB);
7051 F->insert(I, TailMBB);
7052
7053 // Set the call frame size on entry to the new basic blocks.
7054 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7055 IfFalseMBB->setCallFrameSize(CallFrameSize);
7056 TailMBB->setCallFrameSize(CallFrameSize);
7057
7058 // Transfer debug instructions associated with the selects to TailMBB.
7059 for (MachineInstr *DebugInstr : SelectDebugValues) {
7060 TailMBB->push_back(DebugInstr->removeFromParent());
7061 }
7062
7063 // Move all instructions after the sequence to TailMBB.
7064 TailMBB->splice(TailMBB->end(), HeadMBB,
7065 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7066 // Update machine-CFG edges by transferring all successors of the current
7067 // block to the new block which will contain the Phi nodes for the selects.
7068 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7069 // Set the successors for HeadMBB.
7070 HeadMBB->addSuccessor(IfFalseMBB);
7071 HeadMBB->addSuccessor(TailMBB);
7072
7073 // Insert appropriate branch.
7074 if (MI.getOperand(2).isImm())
7075 BuildMI(HeadMBB, DL, TII.get(CC))
7076 .addReg(LHS)
7077 .addImm(MI.getOperand(2).getImm())
7078 .addMBB(TailMBB);
7079 else
7080 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7081
7082 // IfFalseMBB just falls through to TailMBB.
7083 IfFalseMBB->addSuccessor(TailMBB);
7084
7085 // Create PHIs for all of the select pseudo-instructions.
7086 auto SelectMBBI = MI.getIterator();
7087 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7088 auto InsertionPoint = TailMBB->begin();
7089 while (SelectMBBI != SelectEnd) {
7090 auto Next = std::next(SelectMBBI);
7091 if (isSelectPseudo(*SelectMBBI)) {
7092 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7093 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7094 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7095 .addReg(SelectMBBI->getOperand(4).getReg())
7096 .addMBB(HeadMBB)
7097 .addReg(SelectMBBI->getOperand(5).getReg())
7098 .addMBB(IfFalseMBB);
7099 SelectMBBI->eraseFromParent();
7100 }
7101 SelectMBBI = Next;
7102 }
7103
7104 F->getProperties().resetNoPHIs();
7105 return TailMBB;
7106}
7107
7108MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7109 MachineInstr &MI, MachineBasicBlock *BB) const {
7110 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7111 DebugLoc DL = MI.getDebugLoc();
7112
7113 switch (MI.getOpcode()) {
7114 default:
7115 llvm_unreachable("Unexpected instr type to insert");
7116 case LoongArch::DIV_W:
7117 case LoongArch::DIV_WU:
7118 case LoongArch::MOD_W:
7119 case LoongArch::MOD_WU:
7120 case LoongArch::DIV_D:
7121 case LoongArch::DIV_DU:
7122 case LoongArch::MOD_D:
7123 case LoongArch::MOD_DU:
7124 return insertDivByZeroTrap(MI, BB);
7125 break;
7126 case LoongArch::WRFCSR: {
7127 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7128 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7129 .addReg(MI.getOperand(1).getReg());
7130 MI.eraseFromParent();
7131 return BB;
7132 }
7133 case LoongArch::RDFCSR: {
7134 MachineInstr *ReadFCSR =
7135 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7136 MI.getOperand(0).getReg())
7137 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7138 ReadFCSR->getOperand(1).setIsUndef();
7139 MI.eraseFromParent();
7140 return BB;
7141 }
7142 case LoongArch::Select_GPR_Using_CC_GPR:
7143 return emitSelectPseudo(MI, BB, Subtarget);
7144 case LoongArch::BuildPairF64Pseudo:
7145 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7146 case LoongArch::SplitPairF64Pseudo:
7147 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7148 case LoongArch::PseudoVBZ:
7149 case LoongArch::PseudoVBZ_B:
7150 case LoongArch::PseudoVBZ_H:
7151 case LoongArch::PseudoVBZ_W:
7152 case LoongArch::PseudoVBZ_D:
7153 case LoongArch::PseudoVBNZ:
7154 case LoongArch::PseudoVBNZ_B:
7155 case LoongArch::PseudoVBNZ_H:
7156 case LoongArch::PseudoVBNZ_W:
7157 case LoongArch::PseudoVBNZ_D:
7158 case LoongArch::PseudoXVBZ:
7159 case LoongArch::PseudoXVBZ_B:
7160 case LoongArch::PseudoXVBZ_H:
7161 case LoongArch::PseudoXVBZ_W:
7162 case LoongArch::PseudoXVBZ_D:
7163 case LoongArch::PseudoXVBNZ:
7164 case LoongArch::PseudoXVBNZ_B:
7165 case LoongArch::PseudoXVBNZ_H:
7166 case LoongArch::PseudoXVBNZ_W:
7167 case LoongArch::PseudoXVBNZ_D:
7168 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7169 case LoongArch::PseudoXVINSGR2VR_B:
7170 case LoongArch::PseudoXVINSGR2VR_H:
7171 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7172 case LoongArch::PseudoCTPOP:
7173 return emitPseudoCTPOP(MI, BB, Subtarget);
7174 case LoongArch::PseudoVMSKLTZ_B:
7175 case LoongArch::PseudoVMSKLTZ_H:
7176 case LoongArch::PseudoVMSKLTZ_W:
7177 case LoongArch::PseudoVMSKLTZ_D:
7178 case LoongArch::PseudoVMSKGEZ_B:
7179 case LoongArch::PseudoVMSKEQZ_B:
7180 case LoongArch::PseudoVMSKNEZ_B:
7181 case LoongArch::PseudoXVMSKLTZ_B:
7182 case LoongArch::PseudoXVMSKLTZ_H:
7183 case LoongArch::PseudoXVMSKLTZ_W:
7184 case LoongArch::PseudoXVMSKLTZ_D:
7185 case LoongArch::PseudoXVMSKGEZ_B:
7186 case LoongArch::PseudoXVMSKEQZ_B:
7187 case LoongArch::PseudoXVMSKNEZ_B:
7188 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7189 case TargetOpcode::STATEPOINT:
7190 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7191 // while bl call instruction (where statepoint will be lowered at the
7192 // end) has implicit def. This def is early-clobber as it will be set at
7193 // the moment of the call and earlier than any use is read.
7194 // Add this implicit dead def here as a workaround.
7195 MI.addOperand(*MI.getMF(),
7197 LoongArch::R1, /*isDef*/ true,
7198 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7199 /*isUndef*/ false, /*isEarlyClobber*/ true));
7200 if (!Subtarget.is64Bit())
7201 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7202 return emitPatchPoint(MI, BB);
7203 }
7204}
7205
7207 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7208 unsigned *Fast) const {
7209 if (!Subtarget.hasUAL())
7210 return false;
7211
7212 // TODO: set reasonable speed number.
7213 if (Fast)
7214 *Fast = 1;
7215 return true;
7216}
7217
7218const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7219 switch ((LoongArchISD::NodeType)Opcode) {
7221 break;
7222
7223#define NODE_NAME_CASE(node) \
7224 case LoongArchISD::node: \
7225 return "LoongArchISD::" #node;
7226
7227 // TODO: Add more target-dependent nodes later.
7228 NODE_NAME_CASE(CALL)
7229 NODE_NAME_CASE(CALL_MEDIUM)
7230 NODE_NAME_CASE(CALL_LARGE)
7231 NODE_NAME_CASE(RET)
7232 NODE_NAME_CASE(TAIL)
7233 NODE_NAME_CASE(TAIL_MEDIUM)
7234 NODE_NAME_CASE(TAIL_LARGE)
7235 NODE_NAME_CASE(SELECT_CC)
7236 NODE_NAME_CASE(BR_CC)
7237 NODE_NAME_CASE(BRCOND)
7238 NODE_NAME_CASE(SLL_W)
7239 NODE_NAME_CASE(SRA_W)
7240 NODE_NAME_CASE(SRL_W)
7241 NODE_NAME_CASE(BSTRINS)
7242 NODE_NAME_CASE(BSTRPICK)
7243 NODE_NAME_CASE(MOVGR2FR_W)
7244 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7245 NODE_NAME_CASE(MOVGR2FR_D)
7246 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7247 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7248 NODE_NAME_CASE(FTINT)
7249 NODE_NAME_CASE(BUILD_PAIR_F64)
7250 NODE_NAME_CASE(SPLIT_PAIR_F64)
7251 NODE_NAME_CASE(REVB_2H)
7252 NODE_NAME_CASE(REVB_2W)
7253 NODE_NAME_CASE(BITREV_4B)
7254 NODE_NAME_CASE(BITREV_8B)
7255 NODE_NAME_CASE(BITREV_W)
7256 NODE_NAME_CASE(ROTR_W)
7257 NODE_NAME_CASE(ROTL_W)
7258 NODE_NAME_CASE(DIV_W)
7259 NODE_NAME_CASE(DIV_WU)
7260 NODE_NAME_CASE(MOD_W)
7261 NODE_NAME_CASE(MOD_WU)
7262 NODE_NAME_CASE(CLZ_W)
7263 NODE_NAME_CASE(CTZ_W)
7264 NODE_NAME_CASE(DBAR)
7265 NODE_NAME_CASE(IBAR)
7266 NODE_NAME_CASE(BREAK)
7267 NODE_NAME_CASE(SYSCALL)
7268 NODE_NAME_CASE(CRC_W_B_W)
7269 NODE_NAME_CASE(CRC_W_H_W)
7270 NODE_NAME_CASE(CRC_W_W_W)
7271 NODE_NAME_CASE(CRC_W_D_W)
7272 NODE_NAME_CASE(CRCC_W_B_W)
7273 NODE_NAME_CASE(CRCC_W_H_W)
7274 NODE_NAME_CASE(CRCC_W_W_W)
7275 NODE_NAME_CASE(CRCC_W_D_W)
7276 NODE_NAME_CASE(CSRRD)
7277 NODE_NAME_CASE(CSRWR)
7278 NODE_NAME_CASE(CSRXCHG)
7279 NODE_NAME_CASE(IOCSRRD_B)
7280 NODE_NAME_CASE(IOCSRRD_H)
7281 NODE_NAME_CASE(IOCSRRD_W)
7282 NODE_NAME_CASE(IOCSRRD_D)
7283 NODE_NAME_CASE(IOCSRWR_B)
7284 NODE_NAME_CASE(IOCSRWR_H)
7285 NODE_NAME_CASE(IOCSRWR_W)
7286 NODE_NAME_CASE(IOCSRWR_D)
7287 NODE_NAME_CASE(CPUCFG)
7288 NODE_NAME_CASE(MOVGR2FCSR)
7289 NODE_NAME_CASE(MOVFCSR2GR)
7290 NODE_NAME_CASE(CACOP_D)
7291 NODE_NAME_CASE(CACOP_W)
7292 NODE_NAME_CASE(VSHUF)
7293 NODE_NAME_CASE(VPICKEV)
7294 NODE_NAME_CASE(VPICKOD)
7295 NODE_NAME_CASE(VPACKEV)
7296 NODE_NAME_CASE(VPACKOD)
7297 NODE_NAME_CASE(VILVL)
7298 NODE_NAME_CASE(VILVH)
7299 NODE_NAME_CASE(VSHUF4I)
7300 NODE_NAME_CASE(VREPLVEI)
7301 NODE_NAME_CASE(VREPLGR2VR)
7302 NODE_NAME_CASE(XVPERMI)
7303 NODE_NAME_CASE(XVPERM)
7304 NODE_NAME_CASE(XVREPLVE0)
7305 NODE_NAME_CASE(XVREPLVE0Q)
7306 NODE_NAME_CASE(VPICK_SEXT_ELT)
7307 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7308 NODE_NAME_CASE(VREPLVE)
7309 NODE_NAME_CASE(VALL_ZERO)
7310 NODE_NAME_CASE(VANY_ZERO)
7311 NODE_NAME_CASE(VALL_NONZERO)
7312 NODE_NAME_CASE(VANY_NONZERO)
7313 NODE_NAME_CASE(FRECIPE)
7314 NODE_NAME_CASE(FRSQRTE)
7315 NODE_NAME_CASE(VSLLI)
7316 NODE_NAME_CASE(VSRLI)
7317 NODE_NAME_CASE(VBSLL)
7318 NODE_NAME_CASE(VBSRL)
7319 NODE_NAME_CASE(VLDREPL)
7320 NODE_NAME_CASE(VMSKLTZ)
7321 NODE_NAME_CASE(VMSKGEZ)
7322 NODE_NAME_CASE(VMSKEQZ)
7323 NODE_NAME_CASE(VMSKNEZ)
7324 NODE_NAME_CASE(XVMSKLTZ)
7325 NODE_NAME_CASE(XVMSKGEZ)
7326 NODE_NAME_CASE(XVMSKEQZ)
7327 NODE_NAME_CASE(XVMSKNEZ)
7328 NODE_NAME_CASE(VHADDW)
7329 }
7330#undef NODE_NAME_CASE
7331 return nullptr;
7332}
7333
7334//===----------------------------------------------------------------------===//
7335// Calling Convention Implementation
7336//===----------------------------------------------------------------------===//
7337
7338// Eight general-purpose registers a0-a7 used for passing integer arguments,
7339// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7340// fixed-point arguments, and floating-point arguments when no FPR is available
7341// or with soft float ABI.
7342const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7343 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7344 LoongArch::R10, LoongArch::R11};
7345// Eight floating-point registers fa0-fa7 used for passing floating-point
7346// arguments, and fa0-fa1 are also used to return values.
7347const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7348 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7349 LoongArch::F6, LoongArch::F7};
7350// FPR32 and FPR64 alias each other.
7352 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7353 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7354
7355const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7356 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7357 LoongArch::VR6, LoongArch::VR7};
7358
7359const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7360 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7361 LoongArch::XR6, LoongArch::XR7};
7362
7363// Pass a 2*GRLen argument that has been split into two GRLen values through
7364// registers or the stack as necessary.
7365static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7366 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7367 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7368 ISD::ArgFlagsTy ArgFlags2) {
7369 unsigned GRLenInBytes = GRLen / 8;
7370 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7371 // At least one half can be passed via register.
7372 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7373 VA1.getLocVT(), CCValAssign::Full));
7374 } else {
7375 // Both halves must be passed on the stack, with proper alignment.
7376 Align StackAlign =
7377 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7378 State.addLoc(
7380 State.AllocateStack(GRLenInBytes, StackAlign),
7381 VA1.getLocVT(), CCValAssign::Full));
7382 State.addLoc(CCValAssign::getMem(
7383 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7384 LocVT2, CCValAssign::Full));
7385 return false;
7386 }
7387 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7388 // The second half can also be passed via register.
7389 State.addLoc(
7390 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7391 } else {
7392 // The second half is passed via the stack, without additional alignment.
7393 State.addLoc(CCValAssign::getMem(
7394 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7395 LocVT2, CCValAssign::Full));
7396 }
7397 return false;
7398}
7399
7400// Implements the LoongArch calling convention. Returns true upon failure.
7402 unsigned ValNo, MVT ValVT,
7403 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7404 CCState &State, bool IsRet, Type *OrigTy) {
7405 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7406 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7407 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7408 MVT LocVT = ValVT;
7409
7410 // Any return value split into more than two values can't be returned
7411 // directly.
7412 if (IsRet && ValNo > 1)
7413 return true;
7414
7415 // If passing a variadic argument, or if no FPR is available.
7416 bool UseGPRForFloat = true;
7417
7418 switch (ABI) {
7419 default:
7420 llvm_unreachable("Unexpected ABI");
7421 break;
7426 UseGPRForFloat = ArgFlags.isVarArg();
7427 break;
7430 break;
7431 }
7432
7433 // If this is a variadic argument, the LoongArch calling convention requires
7434 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7435 // byte alignment. An aligned register should be used regardless of whether
7436 // the original argument was split during legalisation or not. The argument
7437 // will not be passed by registers if the original type is larger than
7438 // 2*GRLen, so the register alignment rule does not apply.
7439 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7440 if (ArgFlags.isVarArg() &&
7441 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7442 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7443 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7444 // Skip 'odd' register if necessary.
7445 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7446 State.AllocateReg(ArgGPRs);
7447 }
7448
7449 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7450 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7451 State.getPendingArgFlags();
7452
7453 assert(PendingLocs.size() == PendingArgFlags.size() &&
7454 "PendingLocs and PendingArgFlags out of sync");
7455
7456 // FPR32 and FPR64 alias each other.
7457 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7458 UseGPRForFloat = true;
7459
7460 if (UseGPRForFloat && ValVT == MVT::f32) {
7461 LocVT = GRLenVT;
7462 LocInfo = CCValAssign::BCvt;
7463 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7464 LocVT = MVT::i64;
7465 LocInfo = CCValAssign::BCvt;
7466 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7467 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7468 // registers are exhausted.
7469 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7470 // Depending on available argument GPRS, f64 may be passed in a pair of
7471 // GPRs, split between a GPR and the stack, or passed completely on the
7472 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7473 // cases.
7474 MCRegister Reg = State.AllocateReg(ArgGPRs);
7475 if (!Reg) {
7476 int64_t StackOffset = State.AllocateStack(8, Align(8));
7477 State.addLoc(
7478 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7479 return false;
7480 }
7481 LocVT = MVT::i32;
7482 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7483 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7484 if (HiReg) {
7485 State.addLoc(
7486 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7487 } else {
7488 int64_t StackOffset = State.AllocateStack(4, Align(4));
7489 State.addLoc(
7490 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7491 }
7492 return false;
7493 }
7494
7495 // Split arguments might be passed indirectly, so keep track of the pending
7496 // values.
7497 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7498 LocVT = GRLenVT;
7499 LocInfo = CCValAssign::Indirect;
7500 PendingLocs.push_back(
7501 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7502 PendingArgFlags.push_back(ArgFlags);
7503 if (!ArgFlags.isSplitEnd()) {
7504 return false;
7505 }
7506 }
7507
7508 // If the split argument only had two elements, it should be passed directly
7509 // in registers or on the stack.
7510 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7511 PendingLocs.size() <= 2) {
7512 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7513 // Apply the normal calling convention rules to the first half of the
7514 // split argument.
7515 CCValAssign VA = PendingLocs[0];
7516 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7517 PendingLocs.clear();
7518 PendingArgFlags.clear();
7519 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7520 ArgFlags);
7521 }
7522
7523 // Allocate to a register if possible, or else a stack slot.
7524 Register Reg;
7525 unsigned StoreSizeBytes = GRLen / 8;
7526 Align StackAlign = Align(GRLen / 8);
7527
7528 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7529 Reg = State.AllocateReg(ArgFPR32s);
7530 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7531 Reg = State.AllocateReg(ArgFPR64s);
7532 } else if (ValVT.is128BitVector()) {
7533 Reg = State.AllocateReg(ArgVRs);
7534 UseGPRForFloat = false;
7535 StoreSizeBytes = 16;
7536 StackAlign = Align(16);
7537 } else if (ValVT.is256BitVector()) {
7538 Reg = State.AllocateReg(ArgXRs);
7539 UseGPRForFloat = false;
7540 StoreSizeBytes = 32;
7541 StackAlign = Align(32);
7542 } else {
7543 Reg = State.AllocateReg(ArgGPRs);
7544 }
7545
7546 unsigned StackOffset =
7547 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7548
7549 // If we reach this point and PendingLocs is non-empty, we must be at the
7550 // end of a split argument that must be passed indirectly.
7551 if (!PendingLocs.empty()) {
7552 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7553 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7554 for (auto &It : PendingLocs) {
7555 if (Reg)
7556 It.convertToReg(Reg);
7557 else
7558 It.convertToMem(StackOffset);
7559 State.addLoc(It);
7560 }
7561 PendingLocs.clear();
7562 PendingArgFlags.clear();
7563 return false;
7564 }
7565 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7566 "Expected an GRLenVT at this stage");
7567
7568 if (Reg) {
7569 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7570 return false;
7571 }
7572
7573 // When a floating-point value is passed on the stack, no bit-cast is needed.
7574 if (ValVT.isFloatingPoint()) {
7575 LocVT = ValVT;
7576 LocInfo = CCValAssign::Full;
7577 }
7578
7579 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7580 return false;
7581}
7582
7583void LoongArchTargetLowering::analyzeInputArgs(
7584 MachineFunction &MF, CCState &CCInfo,
7585 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7586 LoongArchCCAssignFn Fn) const {
7587 FunctionType *FType = MF.getFunction().getFunctionType();
7588 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7589 MVT ArgVT = Ins[i].VT;
7590 Type *ArgTy = nullptr;
7591 if (IsRet)
7592 ArgTy = FType->getReturnType();
7593 else if (Ins[i].isOrigArg())
7594 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7596 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7597 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7598 CCInfo, IsRet, ArgTy)) {
7599 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7600 << '\n');
7601 llvm_unreachable("");
7602 }
7603 }
7604}
7605
7606void LoongArchTargetLowering::analyzeOutputArgs(
7607 MachineFunction &MF, CCState &CCInfo,
7608 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7609 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7610 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7611 MVT ArgVT = Outs[i].VT;
7612 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7614 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7615 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7616 CCInfo, IsRet, OrigTy)) {
7617 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7618 << "\n");
7619 llvm_unreachable("");
7620 }
7621 }
7622}
7623
7624// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7625// values.
7627 const CCValAssign &VA, const SDLoc &DL) {
7628 switch (VA.getLocInfo()) {
7629 default:
7630 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7631 case CCValAssign::Full:
7633 break;
7634 case CCValAssign::BCvt:
7635 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7636 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7637 else
7638 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7639 break;
7640 }
7641 return Val;
7642}
7643
7645 const CCValAssign &VA, const SDLoc &DL,
7646 const ISD::InputArg &In,
7647 const LoongArchTargetLowering &TLI) {
7650 EVT LocVT = VA.getLocVT();
7651 SDValue Val;
7652 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7653 Register VReg = RegInfo.createVirtualRegister(RC);
7654 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7655 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7656
7657 // If input is sign extended from 32 bits, note it for the OptW pass.
7658 if (In.isOrigArg()) {
7659 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7660 if (OrigArg->getType()->isIntegerTy()) {
7661 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7662 // An input zero extended from i31 can also be considered sign extended.
7663 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7664 (BitWidth < 32 && In.Flags.isZExt())) {
7667 LAFI->addSExt32Register(VReg);
7668 }
7669 }
7670 }
7671
7672 return convertLocVTToValVT(DAG, Val, VA, DL);
7673}
7674
7675// The caller is responsible for loading the full value if the argument is
7676// passed with CCValAssign::Indirect.
7678 const CCValAssign &VA, const SDLoc &DL) {
7680 MachineFrameInfo &MFI = MF.getFrameInfo();
7681 EVT ValVT = VA.getValVT();
7682 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7683 /*IsImmutable=*/true);
7684 SDValue FIN = DAG.getFrameIndex(
7686
7687 ISD::LoadExtType ExtType;
7688 switch (VA.getLocInfo()) {
7689 default:
7690 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7691 case CCValAssign::Full:
7693 case CCValAssign::BCvt:
7694 ExtType = ISD::NON_EXTLOAD;
7695 break;
7696 }
7697 return DAG.getExtLoad(
7698 ExtType, DL, VA.getLocVT(), Chain, FIN,
7700}
7701
7703 const CCValAssign &VA,
7704 const CCValAssign &HiVA,
7705 const SDLoc &DL) {
7706 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7707 "Unexpected VA");
7709 MachineFrameInfo &MFI = MF.getFrameInfo();
7711
7712 assert(VA.isRegLoc() && "Expected register VA assignment");
7713
7714 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7715 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7716 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7717 SDValue Hi;
7718 if (HiVA.isMemLoc()) {
7719 // Second half of f64 is passed on the stack.
7720 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7721 /*IsImmutable=*/true);
7722 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7723 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7725 } else {
7726 // Second half of f64 is passed in another GPR.
7727 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7728 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7729 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7730 }
7731 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7732}
7733
7735 const CCValAssign &VA, const SDLoc &DL) {
7736 EVT LocVT = VA.getLocVT();
7737
7738 switch (VA.getLocInfo()) {
7739 default:
7740 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7741 case CCValAssign::Full:
7742 break;
7743 case CCValAssign::BCvt:
7744 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7745 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7746 else
7747 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7748 break;
7749 }
7750 return Val;
7751}
7752
7753static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7754 CCValAssign::LocInfo LocInfo,
7755 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7756 CCState &State) {
7757 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7758 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7759 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7760 static const MCPhysReg GPRList[] = {
7761 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7762 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7763 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7764 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7765 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7766 return false;
7767 }
7768 }
7769
7770 if (LocVT == MVT::f32) {
7771 // Pass in STG registers: F1, F2, F3, F4
7772 // fs0,fs1,fs2,fs3
7773 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7774 LoongArch::F26, LoongArch::F27};
7775 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7776 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7777 return false;
7778 }
7779 }
7780
7781 if (LocVT == MVT::f64) {
7782 // Pass in STG registers: D1, D2, D3, D4
7783 // fs4,fs5,fs6,fs7
7784 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7785 LoongArch::F30_64, LoongArch::F31_64};
7786 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7787 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7788 return false;
7789 }
7790 }
7791
7792 report_fatal_error("No registers left in GHC calling convention");
7793 return true;
7794}
7795
7796// Transform physical registers into virtual registers.
7798 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7799 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7800 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7801
7803
7804 switch (CallConv) {
7805 default:
7806 llvm_unreachable("Unsupported calling convention");
7807 case CallingConv::C:
7808 case CallingConv::Fast:
7810 break;
7811 case CallingConv::GHC:
7812 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7813 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7815 "GHC calling convention requires the F and D extensions");
7816 }
7817
7818 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7819 MVT GRLenVT = Subtarget.getGRLenVT();
7820 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7821 // Used with varargs to acumulate store chains.
7822 std::vector<SDValue> OutChains;
7823
7824 // Assign locations to all of the incoming arguments.
7826 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7827
7828 if (CallConv == CallingConv::GHC)
7830 else
7831 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7832
7833 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7834 CCValAssign &VA = ArgLocs[i];
7835 SDValue ArgValue;
7836 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7837 // case.
7838 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7839 assert(VA.needsCustom());
7840 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7841 } else if (VA.isRegLoc())
7842 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7843 else
7844 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7845 if (VA.getLocInfo() == CCValAssign::Indirect) {
7846 // If the original argument was split and passed by reference, we need to
7847 // load all parts of it here (using the same address).
7848 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7850 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7851 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7852 assert(ArgPartOffset == 0);
7853 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7854 CCValAssign &PartVA = ArgLocs[i + 1];
7855 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7856 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7857 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7858 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7860 ++i;
7861 ++InsIdx;
7862 }
7863 continue;
7864 }
7865 InVals.push_back(ArgValue);
7866 }
7867
7868 if (IsVarArg) {
7870 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7871 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7872 MachineFrameInfo &MFI = MF.getFrameInfo();
7873 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7874 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7875
7876 // Offset of the first variable argument from stack pointer, and size of
7877 // the vararg save area. For now, the varargs save area is either zero or
7878 // large enough to hold a0-a7.
7879 int VaArgOffset, VarArgsSaveSize;
7880
7881 // If all registers are allocated, then all varargs must be passed on the
7882 // stack and we don't need to save any argregs.
7883 if (ArgRegs.size() == Idx) {
7884 VaArgOffset = CCInfo.getStackSize();
7885 VarArgsSaveSize = 0;
7886 } else {
7887 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7888 VaArgOffset = -VarArgsSaveSize;
7889 }
7890
7891 // Record the frame index of the first variable argument
7892 // which is a value necessary to VASTART.
7893 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7894 LoongArchFI->setVarArgsFrameIndex(FI);
7895
7896 // If saving an odd number of registers then create an extra stack slot to
7897 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7898 // offsets to even-numbered registered remain 2*GRLen-aligned.
7899 if (Idx % 2) {
7900 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7901 true);
7902 VarArgsSaveSize += GRLenInBytes;
7903 }
7904
7905 // Copy the integer registers that may have been used for passing varargs
7906 // to the vararg save area.
7907 for (unsigned I = Idx; I < ArgRegs.size();
7908 ++I, VaArgOffset += GRLenInBytes) {
7909 const Register Reg = RegInfo.createVirtualRegister(RC);
7910 RegInfo.addLiveIn(ArgRegs[I], Reg);
7911 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7912 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7913 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7914 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7916 cast<StoreSDNode>(Store.getNode())
7917 ->getMemOperand()
7918 ->setValue((Value *)nullptr);
7919 OutChains.push_back(Store);
7920 }
7921 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7922 }
7923
7924 // All stores are grouped in one node to allow the matching between
7925 // the size of Ins and InVals. This only happens for vararg functions.
7926 if (!OutChains.empty()) {
7927 OutChains.push_back(Chain);
7928 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7929 }
7930
7931 return Chain;
7932}
7933
7935 return CI->isTailCall();
7936}
7937
7938// Check if the return value is used as only a return value, as otherwise
7939// we can't perform a tail-call.
7941 SDValue &Chain) const {
7942 if (N->getNumValues() != 1)
7943 return false;
7944 if (!N->hasNUsesOfValue(1, 0))
7945 return false;
7946
7947 SDNode *Copy = *N->user_begin();
7948 if (Copy->getOpcode() != ISD::CopyToReg)
7949 return false;
7950
7951 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7952 // isn't safe to perform a tail call.
7953 if (Copy->getGluedNode())
7954 return false;
7955
7956 // The copy must be used by a LoongArchISD::RET, and nothing else.
7957 bool HasRet = false;
7958 for (SDNode *Node : Copy->users()) {
7959 if (Node->getOpcode() != LoongArchISD::RET)
7960 return false;
7961 HasRet = true;
7962 }
7963
7964 if (!HasRet)
7965 return false;
7966
7967 Chain = Copy->getOperand(0);
7968 return true;
7969}
7970
7971// Check whether the call is eligible for tail call optimization.
7972bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7973 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7974 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7975
7976 auto CalleeCC = CLI.CallConv;
7977 auto &Outs = CLI.Outs;
7978 auto &Caller = MF.getFunction();
7979 auto CallerCC = Caller.getCallingConv();
7980
7981 // Do not tail call opt if the stack is used to pass parameters.
7982 if (CCInfo.getStackSize() != 0)
7983 return false;
7984
7985 // Do not tail call opt if any parameters need to be passed indirectly.
7986 for (auto &VA : ArgLocs)
7987 if (VA.getLocInfo() == CCValAssign::Indirect)
7988 return false;
7989
7990 // Do not tail call opt if either caller or callee uses struct return
7991 // semantics.
7992 auto IsCallerStructRet = Caller.hasStructRetAttr();
7993 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7994 if (IsCallerStructRet || IsCalleeStructRet)
7995 return false;
7996
7997 // Do not tail call opt if either the callee or caller has a byval argument.
7998 for (auto &Arg : Outs)
7999 if (Arg.Flags.isByVal())
8000 return false;
8001
8002 // The callee has to preserve all registers the caller needs to preserve.
8003 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8004 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8005 if (CalleeCC != CallerCC) {
8006 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8007 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8008 return false;
8009 }
8010 return true;
8011}
8012
8014 return DAG.getDataLayout().getPrefTypeAlign(
8015 VT.getTypeForEVT(*DAG.getContext()));
8016}
8017
8018// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8019// and output parameter nodes.
8020SDValue
8022 SmallVectorImpl<SDValue> &InVals) const {
8023 SelectionDAG &DAG = CLI.DAG;
8024 SDLoc &DL = CLI.DL;
8026 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8028 SDValue Chain = CLI.Chain;
8029 SDValue Callee = CLI.Callee;
8030 CallingConv::ID CallConv = CLI.CallConv;
8031 bool IsVarArg = CLI.IsVarArg;
8032 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8033 MVT GRLenVT = Subtarget.getGRLenVT();
8034 bool &IsTailCall = CLI.IsTailCall;
8035
8037
8038 // Analyze the operands of the call, assigning locations to each operand.
8040 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8041
8042 if (CallConv == CallingConv::GHC)
8043 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8044 else
8045 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8046
8047 // Check if it's really possible to do a tail call.
8048 if (IsTailCall)
8049 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8050
8051 if (IsTailCall)
8052 ++NumTailCalls;
8053 else if (CLI.CB && CLI.CB->isMustTailCall())
8054 report_fatal_error("failed to perform tail call elimination on a call "
8055 "site marked musttail");
8056
8057 // Get a count of how many bytes are to be pushed on the stack.
8058 unsigned NumBytes = ArgCCInfo.getStackSize();
8059
8060 // Create local copies for byval args.
8061 SmallVector<SDValue> ByValArgs;
8062 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8063 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8064 if (!Flags.isByVal())
8065 continue;
8066
8067 SDValue Arg = OutVals[i];
8068 unsigned Size = Flags.getByValSize();
8069 Align Alignment = Flags.getNonZeroByValAlign();
8070
8071 int FI =
8072 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8073 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8074 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8075
8076 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8077 /*IsVolatile=*/false,
8078 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8080 ByValArgs.push_back(FIPtr);
8081 }
8082
8083 if (!IsTailCall)
8084 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8085
8086 // Copy argument values to their designated locations.
8088 SmallVector<SDValue> MemOpChains;
8089 SDValue StackPtr;
8090 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8091 ++i, ++OutIdx) {
8092 CCValAssign &VA = ArgLocs[i];
8093 SDValue ArgValue = OutVals[OutIdx];
8094 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8095
8096 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8097 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8098 assert(VA.isRegLoc() && "Expected register VA assignment");
8099 assert(VA.needsCustom());
8100 SDValue SplitF64 =
8102 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8103 SDValue Lo = SplitF64.getValue(0);
8104 SDValue Hi = SplitF64.getValue(1);
8105
8106 Register RegLo = VA.getLocReg();
8107 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8108
8109 // Get the CCValAssign for the Hi part.
8110 CCValAssign &HiVA = ArgLocs[++i];
8111
8112 if (HiVA.isMemLoc()) {
8113 // Second half of f64 is passed on the stack.
8114 if (!StackPtr.getNode())
8115 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8117 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8118 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8119 // Emit the store.
8120 MemOpChains.push_back(DAG.getStore(
8121 Chain, DL, Hi, Address,
8123 } else {
8124 // Second half of f64 is passed in another GPR.
8125 Register RegHigh = HiVA.getLocReg();
8126 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8127 }
8128 continue;
8129 }
8130
8131 // Promote the value if needed.
8132 // For now, only handle fully promoted and indirect arguments.
8133 if (VA.getLocInfo() == CCValAssign::Indirect) {
8134 // Store the argument in a stack slot and pass its address.
8135 Align StackAlign =
8136 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8137 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8138 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8139 // If the original argument was split and passed by reference, we need to
8140 // store the required parts of it here (and pass just one address).
8141 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8142 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8143 assert(ArgPartOffset == 0);
8144 // Calculate the total size to store. We don't have access to what we're
8145 // actually storing other than performing the loop and collecting the
8146 // info.
8148 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8149 SDValue PartValue = OutVals[OutIdx + 1];
8150 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8151 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8152 EVT PartVT = PartValue.getValueType();
8153
8154 StoredSize += PartVT.getStoreSize();
8155 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8156 Parts.push_back(std::make_pair(PartValue, Offset));
8157 ++i;
8158 ++OutIdx;
8159 }
8160 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8161 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8162 MemOpChains.push_back(
8163 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8165 for (const auto &Part : Parts) {
8166 SDValue PartValue = Part.first;
8167 SDValue PartOffset = Part.second;
8169 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8170 MemOpChains.push_back(
8171 DAG.getStore(Chain, DL, PartValue, Address,
8173 }
8174 ArgValue = SpillSlot;
8175 } else {
8176 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8177 }
8178
8179 // Use local copy if it is a byval arg.
8180 if (Flags.isByVal())
8181 ArgValue = ByValArgs[j++];
8182
8183 if (VA.isRegLoc()) {
8184 // Queue up the argument copies and emit them at the end.
8185 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8186 } else {
8187 assert(VA.isMemLoc() && "Argument not register or memory");
8188 assert(!IsTailCall && "Tail call not allowed if stack is used "
8189 "for passing parameters");
8190
8191 // Work out the address of the stack slot.
8192 if (!StackPtr.getNode())
8193 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8195 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8197
8198 // Emit the store.
8199 MemOpChains.push_back(
8200 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8201 }
8202 }
8203
8204 // Join the stores, which are independent of one another.
8205 if (!MemOpChains.empty())
8206 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8207
8208 SDValue Glue;
8209
8210 // Build a sequence of copy-to-reg nodes, chained and glued together.
8211 for (auto &Reg : RegsToPass) {
8212 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8213 Glue = Chain.getValue(1);
8214 }
8215
8216 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8217 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8218 // split it and then direct call can be matched by PseudoCALL.
8220 const GlobalValue *GV = S->getGlobal();
8221 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8224 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8225 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8226 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8229 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8230 }
8231
8232 // The first call operand is the chain and the second is the target address.
8234 Ops.push_back(Chain);
8235 Ops.push_back(Callee);
8236
8237 // Add argument registers to the end of the list so that they are
8238 // known live into the call.
8239 for (auto &Reg : RegsToPass)
8240 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8241
8242 if (!IsTailCall) {
8243 // Add a register mask operand representing the call-preserved registers.
8244 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8245 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8246 assert(Mask && "Missing call preserved mask for calling convention");
8247 Ops.push_back(DAG.getRegisterMask(Mask));
8248 }
8249
8250 // Glue the call to the argument copies, if any.
8251 if (Glue.getNode())
8252 Ops.push_back(Glue);
8253
8254 // Emit the call.
8255 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8256 unsigned Op;
8257 switch (DAG.getTarget().getCodeModel()) {
8258 default:
8259 report_fatal_error("Unsupported code model");
8260 case CodeModel::Small:
8261 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8262 break;
8263 case CodeModel::Medium:
8264 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8266 break;
8267 case CodeModel::Large:
8268 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8270 break;
8271 }
8272
8273 if (IsTailCall) {
8275 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8276 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8277 return Ret;
8278 }
8279
8280 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8281 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8282 Glue = Chain.getValue(1);
8283
8284 // Mark the end of the call, which is glued to the call itself.
8285 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8286 Glue = Chain.getValue(1);
8287
8288 // Assign locations to each value returned by this call.
8290 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8291 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8292
8293 // Copy all of the result registers out of their specified physreg.
8294 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8295 auto &VA = RVLocs[i];
8296 // Copy the value out.
8297 SDValue RetValue =
8298 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8299 // Glue the RetValue to the end of the call sequence.
8300 Chain = RetValue.getValue(1);
8301 Glue = RetValue.getValue(2);
8302
8303 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8304 assert(VA.needsCustom());
8305 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8306 MVT::i32, Glue);
8307 Chain = RetValue2.getValue(1);
8308 Glue = RetValue2.getValue(2);
8309 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8310 RetValue, RetValue2);
8311 } else
8312 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8313
8314 InVals.push_back(RetValue);
8315 }
8316
8317 return Chain;
8318}
8319
8321 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8322 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8323 const Type *RetTy) const {
8325 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8326
8327 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8328 LoongArchABI::ABI ABI =
8329 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8330 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8331 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8332 return false;
8333 }
8334 return true;
8335}
8336
8338 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8340 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8341 SelectionDAG &DAG) const {
8342 // Stores the assignment of the return value to a location.
8344
8345 // Info about the registers and stack slot.
8346 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8347 *DAG.getContext());
8348
8349 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8350 nullptr, CC_LoongArch);
8351 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8352 report_fatal_error("GHC functions return void only");
8353 SDValue Glue;
8354 SmallVector<SDValue, 4> RetOps(1, Chain);
8355
8356 // Copy the result values into the output registers.
8357 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8358 SDValue Val = OutVals[OutIdx];
8359 CCValAssign &VA = RVLocs[i];
8360 assert(VA.isRegLoc() && "Can only return in registers!");
8361
8362 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8363 // Handle returning f64 on LA32D with a soft float ABI.
8364 assert(VA.isRegLoc() && "Expected return via registers");
8365 assert(VA.needsCustom());
8367 DAG.getVTList(MVT::i32, MVT::i32), Val);
8368 SDValue Lo = SplitF64.getValue(0);
8369 SDValue Hi = SplitF64.getValue(1);
8370 Register RegLo = VA.getLocReg();
8371 Register RegHi = RVLocs[++i].getLocReg();
8372
8373 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8374 Glue = Chain.getValue(1);
8375 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8376 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8377 Glue = Chain.getValue(1);
8378 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8379 } else {
8380 // Handle a 'normal' return.
8381 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8382 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8383
8384 // Guarantee that all emitted copies are stuck together.
8385 Glue = Chain.getValue(1);
8386 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8387 }
8388 }
8389
8390 RetOps[0] = Chain; // Update chain.
8391
8392 // Add the glue node if we have it.
8393 if (Glue.getNode())
8394 RetOps.push_back(Glue);
8395
8396 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8397}
8398
8400 EVT VT) const {
8401 if (!Subtarget.hasExtLSX())
8402 return false;
8403
8404 if (VT == MVT::f32) {
8405 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8406 return (masked == 0x3e000000 || masked == 0x40000000);
8407 }
8408
8409 if (VT == MVT::f64) {
8410 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8411 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8412 }
8413
8414 return false;
8415}
8416
8417bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8418 bool ForCodeSize) const {
8419 // TODO: Maybe need more checks here after vector extension is supported.
8420 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8421 return false;
8422 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8423 return false;
8424 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8425}
8426
8428 return true;
8429}
8430
8432 return true;
8433}
8434
8435bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8436 const Instruction *I) const {
8437 if (!Subtarget.is64Bit())
8438 return isa<LoadInst>(I) || isa<StoreInst>(I);
8439
8440 if (isa<LoadInst>(I))
8441 return true;
8442
8443 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8444 // require fences beacuse we can use amswap_db.[w/d].
8445 Type *Ty = I->getOperand(0)->getType();
8446 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8447 unsigned Size = Ty->getIntegerBitWidth();
8448 return (Size == 8 || Size == 16);
8449 }
8450
8451 return false;
8452}
8453
8455 LLVMContext &Context,
8456 EVT VT) const {
8457 if (!VT.isVector())
8458 return getPointerTy(DL);
8460}
8461
8463 // TODO: Support vectors.
8464 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8465}
8466
8468 const CallInst &I,
8469 MachineFunction &MF,
8470 unsigned Intrinsic) const {
8471 switch (Intrinsic) {
8472 default:
8473 return false;
8474 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8475 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8476 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8477 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8478 Info.opc = ISD::INTRINSIC_W_CHAIN;
8479 Info.memVT = MVT::i32;
8480 Info.ptrVal = I.getArgOperand(0);
8481 Info.offset = 0;
8482 Info.align = Align(4);
8485 return true;
8486 // TODO: Add more Intrinsics later.
8487 }
8488}
8489
8490// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8491// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8492// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8493// regression, we need to implement it manually.
8496
8498 Op == AtomicRMWInst::And) &&
8499 "Unable to expand");
8500 unsigned MinWordSize = 4;
8501
8502 IRBuilder<> Builder(AI);
8503 LLVMContext &Ctx = Builder.getContext();
8504 const DataLayout &DL = AI->getDataLayout();
8505 Type *ValueType = AI->getType();
8506 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8507
8508 Value *Addr = AI->getPointerOperand();
8509 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8510 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8511
8512 Value *AlignedAddr = Builder.CreateIntrinsic(
8513 Intrinsic::ptrmask, {PtrTy, IntTy},
8514 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8515 "AlignedAddr");
8516
8517 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8518 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8519 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8520 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8521 Value *Mask = Builder.CreateShl(
8522 ConstantInt::get(WordType,
8523 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8524 ShiftAmt, "Mask");
8525 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8526 Value *ValOperand_Shifted =
8527 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8528 ShiftAmt, "ValOperand_Shifted");
8529 Value *NewOperand;
8530 if (Op == AtomicRMWInst::And)
8531 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8532 else
8533 NewOperand = ValOperand_Shifted;
8534
8535 AtomicRMWInst *NewAI =
8536 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8537 AI->getOrdering(), AI->getSyncScopeID());
8538
8539 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8540 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8541 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8542 AI->replaceAllUsesWith(FinalOldResult);
8543 AI->eraseFromParent();
8544}
8545
8548 // TODO: Add more AtomicRMWInst that needs to be extended.
8549
8550 // Since floating-point operation requires a non-trivial set of data
8551 // operations, use CmpXChg to expand.
8552 if (AI->isFloatingPointOperation() ||
8558
8559 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8562 AI->getOperation() == AtomicRMWInst::Sub)) {
8564 }
8565
8566 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8567 if (Subtarget.hasLAMCAS()) {
8568 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8572 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8574 }
8575
8576 if (Size == 8 || Size == 16)
8579}
8580
8581static Intrinsic::ID
8583 AtomicRMWInst::BinOp BinOp) {
8584 if (GRLen == 64) {
8585 switch (BinOp) {
8586 default:
8587 llvm_unreachable("Unexpected AtomicRMW BinOp");
8589 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8590 case AtomicRMWInst::Add:
8591 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8592 case AtomicRMWInst::Sub:
8593 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8595 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8597 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8599 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8600 case AtomicRMWInst::Max:
8601 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8602 case AtomicRMWInst::Min:
8603 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8604 // TODO: support other AtomicRMWInst.
8605 }
8606 }
8607
8608 if (GRLen == 32) {
8609 switch (BinOp) {
8610 default:
8611 llvm_unreachable("Unexpected AtomicRMW BinOp");
8613 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8614 case AtomicRMWInst::Add:
8615 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8616 case AtomicRMWInst::Sub:
8617 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8619 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8621 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8623 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8624 case AtomicRMWInst::Max:
8625 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8626 case AtomicRMWInst::Min:
8627 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8628 // TODO: support other AtomicRMWInst.
8629 }
8630 }
8631
8632 llvm_unreachable("Unexpected GRLen\n");
8633}
8634
8637 AtomicCmpXchgInst *CI) const {
8638
8639 if (Subtarget.hasLAMCAS())
8641
8643 if (Size == 8 || Size == 16)
8646}
8647
8649 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8650 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8651 unsigned GRLen = Subtarget.getGRLen();
8652 AtomicOrdering FailOrd = CI->getFailureOrdering();
8653 Value *FailureOrdering =
8654 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8655 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8656 if (GRLen == 64) {
8657 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8658 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8659 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8660 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8661 }
8662 Type *Tys[] = {AlignedAddr->getType()};
8663 Value *Result = Builder.CreateIntrinsic(
8664 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8665 if (GRLen == 64)
8666 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8667 return Result;
8668}
8669
8671 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8672 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8673 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8674 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8675 // mask, as this produces better code than the LL/SC loop emitted by
8676 // int_loongarch_masked_atomicrmw_xchg.
8677 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8680 if (CVal->isZero())
8681 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8682 Builder.CreateNot(Mask, "Inv_Mask"),
8683 AI->getAlign(), Ord);
8684 if (CVal->isMinusOne())
8685 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8686 AI->getAlign(), Ord);
8687 }
8688
8689 unsigned GRLen = Subtarget.getGRLen();
8690 Value *Ordering =
8691 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8692 Type *Tys[] = {AlignedAddr->getType()};
8694 AI->getModule(),
8696
8697 if (GRLen == 64) {
8698 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8699 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8700 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8701 }
8702
8703 Value *Result;
8704
8705 // Must pass the shift amount needed to sign extend the loaded value prior
8706 // to performing a signed comparison for min/max. ShiftAmt is the number of
8707 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8708 // is the number of bits to left+right shift the value in order to
8709 // sign-extend.
8710 if (AI->getOperation() == AtomicRMWInst::Min ||
8712 const DataLayout &DL = AI->getDataLayout();
8713 unsigned ValWidth =
8714 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8715 Value *SextShamt =
8716 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8717 Result = Builder.CreateCall(LlwOpScwLoop,
8718 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8719 } else {
8720 Result =
8721 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8722 }
8723
8724 if (GRLen == 64)
8725 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8726 return Result;
8727}
8728
8730 const MachineFunction &MF, EVT VT) const {
8731 VT = VT.getScalarType();
8732
8733 if (!VT.isSimple())
8734 return false;
8735
8736 switch (VT.getSimpleVT().SimpleTy) {
8737 case MVT::f32:
8738 case MVT::f64:
8739 return true;
8740 default:
8741 break;
8742 }
8743
8744 return false;
8745}
8746
8748 const Constant *PersonalityFn) const {
8749 return LoongArch::R4;
8750}
8751
8753 const Constant *PersonalityFn) const {
8754 return LoongArch::R5;
8755}
8756
8757//===----------------------------------------------------------------------===//
8758// Target Optimization Hooks
8759//===----------------------------------------------------------------------===//
8760
8762 const LoongArchSubtarget &Subtarget) {
8763 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8764 // IEEE float has 23 digits and double has 52 digits.
8765 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8766 return RefinementSteps;
8767}
8768
8770 SelectionDAG &DAG, int Enabled,
8771 int &RefinementSteps,
8772 bool &UseOneConstNR,
8773 bool Reciprocal) const {
8774 if (Subtarget.hasFrecipe()) {
8775 SDLoc DL(Operand);
8776 EVT VT = Operand.getValueType();
8777
8778 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8779 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8780 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8781 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8782 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8783
8784 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8785 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8786
8787 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8788 if (Reciprocal)
8789 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8790
8791 return Estimate;
8792 }
8793 }
8794
8795 return SDValue();
8796}
8797
8799 SelectionDAG &DAG,
8800 int Enabled,
8801 int &RefinementSteps) const {
8802 if (Subtarget.hasFrecipe()) {
8803 SDLoc DL(Operand);
8804 EVT VT = Operand.getValueType();
8805
8806 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8807 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8808 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8809 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8810 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8811
8812 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8813 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8814
8815 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8816 }
8817 }
8818
8819 return SDValue();
8820}
8821
8822//===----------------------------------------------------------------------===//
8823// LoongArch Inline Assembly Support
8824//===----------------------------------------------------------------------===//
8825
8827LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8828 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8829 //
8830 // 'f': A floating-point register (if available).
8831 // 'k': A memory operand whose address is formed by a base register and
8832 // (optionally scaled) index register.
8833 // 'l': A signed 16-bit constant.
8834 // 'm': A memory operand whose address is formed by a base register and
8835 // offset that is suitable for use in instructions with the same
8836 // addressing mode as st.w and ld.w.
8837 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8838 // instruction)
8839 // 'I': A signed 12-bit constant (for arithmetic instructions).
8840 // 'J': Integer zero.
8841 // 'K': An unsigned 12-bit constant (for logic instructions).
8842 // "ZB": An address that is held in a general-purpose register. The offset is
8843 // zero.
8844 // "ZC": A memory operand whose address is formed by a base register and
8845 // offset that is suitable for use in instructions with the same
8846 // addressing mode as ll.w and sc.w.
8847 if (Constraint.size() == 1) {
8848 switch (Constraint[0]) {
8849 default:
8850 break;
8851 case 'f':
8852 case 'q':
8853 return C_RegisterClass;
8854 case 'l':
8855 case 'I':
8856 case 'J':
8857 case 'K':
8858 return C_Immediate;
8859 case 'k':
8860 return C_Memory;
8861 }
8862 }
8863
8864 if (Constraint == "ZC" || Constraint == "ZB")
8865 return C_Memory;
8866
8867 // 'm' is handled here.
8868 return TargetLowering::getConstraintType(Constraint);
8869}
8870
8871InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8872 StringRef ConstraintCode) const {
8873 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8877 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8878}
8879
8880std::pair<unsigned, const TargetRegisterClass *>
8881LoongArchTargetLowering::getRegForInlineAsmConstraint(
8882 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8883 // First, see if this is a constraint that directly corresponds to a LoongArch
8884 // register class.
8885 if (Constraint.size() == 1) {
8886 switch (Constraint[0]) {
8887 case 'r':
8888 // TODO: Support fixed vectors up to GRLen?
8889 if (VT.isVector())
8890 break;
8891 return std::make_pair(0U, &LoongArch::GPRRegClass);
8892 case 'q':
8893 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8894 case 'f':
8895 if (Subtarget.hasBasicF() && VT == MVT::f32)
8896 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8897 if (Subtarget.hasBasicD() && VT == MVT::f64)
8898 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8899 if (Subtarget.hasExtLSX() &&
8900 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8901 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8902 if (Subtarget.hasExtLASX() &&
8903 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8904 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8905 break;
8906 default:
8907 break;
8908 }
8909 }
8910
8911 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8912 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8913 // constraints while the official register name is prefixed with a '$'. So we
8914 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8915 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8916 // case insensitive, so no need to convert the constraint to upper case here.
8917 //
8918 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8919 // decode the usage of register name aliases into their official names. And
8920 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8921 // official register names.
8922 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8923 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8924 bool IsFP = Constraint[2] == 'f';
8925 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8926 std::pair<unsigned, const TargetRegisterClass *> R;
8928 TRI, join_items("", Temp.first, Temp.second), VT);
8929 // Match those names to the widest floating point register type available.
8930 if (IsFP) {
8931 unsigned RegNo = R.first;
8932 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8933 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8934 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8935 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8936 }
8937 }
8938 }
8939 return R;
8940 }
8941
8942 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8943}
8944
8945void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8946 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8947 SelectionDAG &DAG) const {
8948 // Currently only support length 1 constraints.
8949 if (Constraint.size() == 1) {
8950 switch (Constraint[0]) {
8951 case 'l':
8952 // Validate & create a 16-bit signed immediate operand.
8953 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8954 uint64_t CVal = C->getSExtValue();
8955 if (isInt<16>(CVal))
8956 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8957 Subtarget.getGRLenVT()));
8958 }
8959 return;
8960 case 'I':
8961 // Validate & create a 12-bit signed immediate operand.
8962 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8963 uint64_t CVal = C->getSExtValue();
8964 if (isInt<12>(CVal))
8965 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8966 Subtarget.getGRLenVT()));
8967 }
8968 return;
8969 case 'J':
8970 // Validate & create an integer zero operand.
8971 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8972 if (C->getZExtValue() == 0)
8973 Ops.push_back(
8974 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8975 return;
8976 case 'K':
8977 // Validate & create a 12-bit unsigned immediate operand.
8978 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8979 uint64_t CVal = C->getZExtValue();
8980 if (isUInt<12>(CVal))
8981 Ops.push_back(
8982 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8983 }
8984 return;
8985 default:
8986 break;
8987 }
8988 }
8990}
8991
8992#define GET_REGISTER_MATCHER
8993#include "LoongArchGenAsmMatcher.inc"
8994
8997 const MachineFunction &MF) const {
8998 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8999 std::string NewRegName = Name.second.str();
9000 Register Reg = MatchRegisterAltName(NewRegName);
9001 if (!Reg)
9002 Reg = MatchRegisterName(NewRegName);
9003 if (!Reg)
9004 return Reg;
9005 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9006 if (!ReservedRegs.test(Reg))
9007 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9008 StringRef(RegName) + "\"."));
9009 return Reg;
9010}
9011
9013 EVT VT, SDValue C) const {
9014 // TODO: Support vectors.
9015 if (!VT.isScalarInteger())
9016 return false;
9017
9018 // Omit the optimization if the data size exceeds GRLen.
9019 if (VT.getSizeInBits() > Subtarget.getGRLen())
9020 return false;
9021
9022 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9023 const APInt &Imm = ConstNode->getAPIntValue();
9024 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9025 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9026 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9027 return true;
9028 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9029 if (ConstNode->hasOneUse() &&
9030 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9031 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9032 return true;
9033 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9034 // in which the immediate has two set bits. Or Break (MUL x, imm)
9035 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9036 // equals to (1 << s0) - (1 << s1).
9037 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9038 unsigned Shifts = Imm.countr_zero();
9039 // Reject immediates which can be composed via a single LUI.
9040 if (Shifts >= 12)
9041 return false;
9042 // Reject multiplications can be optimized to
9043 // (SLLI (ALSL x, x, 1/2/3/4), s).
9044 APInt ImmPop = Imm.ashr(Shifts);
9045 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9046 return false;
9047 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9048 // since it needs one more instruction than other 3 cases.
9049 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9050 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9051 (ImmSmall - Imm).isPowerOf2())
9052 return true;
9053 }
9054 }
9055
9056 return false;
9057}
9058
9060 const AddrMode &AM,
9061 Type *Ty, unsigned AS,
9062 Instruction *I) const {
9063 // LoongArch has four basic addressing modes:
9064 // 1. reg
9065 // 2. reg + 12-bit signed offset
9066 // 3. reg + 14-bit signed offset left-shifted by 2
9067 // 4. reg1 + reg2
9068 // TODO: Add more checks after support vector extension.
9069
9070 // No global is ever allowed as a base.
9071 if (AM.BaseGV)
9072 return false;
9073
9074 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9075 // with `UAL` feature.
9076 if (!isInt<12>(AM.BaseOffs) &&
9077 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9078 return false;
9079
9080 switch (AM.Scale) {
9081 case 0:
9082 // "r+i" or just "i", depending on HasBaseReg.
9083 break;
9084 case 1:
9085 // "r+r+i" is not allowed.
9086 if (AM.HasBaseReg && AM.BaseOffs)
9087 return false;
9088 // Otherwise we have "r+r" or "r+i".
9089 break;
9090 case 2:
9091 // "2*r+r" or "2*r+i" is not allowed.
9092 if (AM.HasBaseReg || AM.BaseOffs)
9093 return false;
9094 // Allow "2*r" as "r+r".
9095 break;
9096 default:
9097 return false;
9098 }
9099
9100 return true;
9101}
9102
9104 return isInt<12>(Imm);
9105}
9106
9108 return isInt<12>(Imm);
9109}
9110
9112 // Zexts are free if they can be combined with a load.
9113 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9114 // poorly with type legalization of compares preferring sext.
9115 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9116 EVT MemVT = LD->getMemoryVT();
9117 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9118 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9119 LD->getExtensionType() == ISD::ZEXTLOAD))
9120 return true;
9121 }
9122
9123 return TargetLowering::isZExtFree(Val, VT2);
9124}
9125
9127 EVT DstVT) const {
9128 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9129}
9130
9132 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9133}
9134
9136 // TODO: Support vectors.
9137 if (Y.getValueType().isVector())
9138 return false;
9139
9140 return !isa<ConstantSDNode>(Y);
9141}
9142
9144 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9145 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9146}
9147
9149 Type *Ty, bool IsSigned) const {
9150 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9151 return true;
9152
9153 return IsSigned;
9154}
9155
9157 // Return false to suppress the unnecessary extensions if the LibCall
9158 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9159 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9160 Type.getSizeInBits() < Subtarget.getGRLen()))
9161 return false;
9162 return true;
9163}
9164
9165// memcpy, and other memory intrinsics, typically tries to use wider load/store
9166// if the source/dest is aligned and the copy size is large enough. We therefore
9167// want to align such objects passed to memory intrinsics.
9169 unsigned &MinSize,
9170 Align &PrefAlign) const {
9171 if (!isa<MemIntrinsic>(CI))
9172 return false;
9173
9174 if (Subtarget.is64Bit()) {
9175 MinSize = 8;
9176 PrefAlign = Align(8);
9177 } else {
9178 MinSize = 4;
9179 PrefAlign = Align(4);
9180 }
9181
9182 return true;
9183}
9184
9193
9194bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9195 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9196 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9197 bool IsABIRegCopy = CC.has_value();
9198 EVT ValueVT = Val.getValueType();
9199
9200 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9201 PartVT == MVT::f32) {
9202 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9203 // nan, and cast to f32.
9204 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9205 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9206 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9207 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9208 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9209 Parts[0] = Val;
9210 return true;
9211 }
9212
9213 return false;
9214}
9215
9216SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9217 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9218 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9219 bool IsABIRegCopy = CC.has_value();
9220
9221 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9222 PartVT == MVT::f32) {
9223 SDValue Val = Parts[0];
9224
9225 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9226 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9227 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9228 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9229 return Val;
9230 }
9231
9232 return SDValue();
9233}
9234
9235MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9236 CallingConv::ID CC,
9237 EVT VT) const {
9238 // Use f32 to pass f16.
9239 if (VT == MVT::f16 && Subtarget.hasBasicF())
9240 return MVT::f32;
9241
9243}
9244
9245unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9246 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9247 // Use f32 to pass f16.
9248 if (VT == MVT::f16 && Subtarget.hasBasicF())
9249 return 1;
9250
9252}
9253
9255 SDValue Op, const APInt &OriginalDemandedBits,
9256 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9257 unsigned Depth) const {
9258 EVT VT = Op.getValueType();
9259 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9260 unsigned Opc = Op.getOpcode();
9261 switch (Opc) {
9262 default:
9263 break;
9266 SDValue Src = Op.getOperand(0);
9267 MVT SrcVT = Src.getSimpleValueType();
9268 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9269 unsigned NumElts = SrcVT.getVectorNumElements();
9270
9271 // If we don't need the sign bits at all just return zero.
9272 if (OriginalDemandedBits.countr_zero() >= NumElts)
9273 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9274
9275 // Only demand the vector elements of the sign bits we need.
9276 APInt KnownUndef, KnownZero;
9277 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9278 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9279 TLO, Depth + 1))
9280 return true;
9281
9282 Known.Zero = KnownZero.zext(BitWidth);
9283 Known.Zero.setHighBits(BitWidth - NumElts);
9284
9285 // [X]VMSKLTZ only uses the MSB from each vector element.
9286 KnownBits KnownSrc;
9287 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9288 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9289 Depth + 1))
9290 return true;
9291
9292 if (KnownSrc.One[SrcBits - 1])
9293 Known.One.setLowBits(NumElts);
9294 else if (KnownSrc.Zero[SrcBits - 1])
9295 Known.Zero.setLowBits(NumElts);
9296
9297 // Attempt to avoid multi-use ops if we don't need anything from it.
9299 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9300 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9301 return false;
9302 }
9303 }
9304
9306 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9307}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:162
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:130
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...