LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit())
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249
250 if (!Subtarget.hasBasicD()) {
252 if (Subtarget.is64Bit()) {
255 }
256 }
257 }
258
259 // Set operations for 'D' feature.
260
261 if (Subtarget.hasBasicD()) {
262 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
265 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
268 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
269
272 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
276 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
277 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
278 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
282 setOperationAction(ISD::FSIN, MVT::f64, Expand);
283 setOperationAction(ISD::FCOS, MVT::f64, Expand);
284 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
285 setOperationAction(ISD::FPOW, MVT::f64, Expand);
287 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
288 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
289 Subtarget.isSoftFPABI() ? LibCall : Custom);
290 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
291 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293
294 if (Subtarget.is64Bit())
295 setOperationAction(ISD::FRINT, MVT::f64, Legal);
296 }
297
298 // Set operations for 'LSX' feature.
299
300 if (Subtarget.hasExtLSX()) {
302 // Expand all truncating stores and extending loads.
303 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
304 setTruncStoreAction(VT, InnerVT, Expand);
307 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
308 }
309 // By default everything must be expanded. Then we will selectively turn
310 // on ones that can be effectively codegen'd.
311 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 }
314
315 for (MVT VT : LSXVTs) {
316 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
317 setOperationAction(ISD::BITCAST, VT, Legal);
319
323
328 }
329 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
332 Legal);
334 VT, Legal);
341 Expand);
350 }
351 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
353 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
355 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
358 }
359 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
363 setOperationAction(ISD::FSQRT, VT, Legal);
364 setOperationAction(ISD::FNEG, VT, Legal);
367 VT, Expand);
369 }
371 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
372 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
373 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
374 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
375
376 for (MVT VT :
377 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
378 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
380 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
381 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
382 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
383 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
384 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
385 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
386 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
387 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
388 }
389 }
390
391 // Set operations for 'LASX' feature.
392
393 if (Subtarget.hasExtLASX()) {
394 for (MVT VT : LASXVTs) {
395 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
396 setOperationAction(ISD::BITCAST, VT, Legal);
398
404
408 }
409 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
412 Legal);
414 VT, Legal);
421 Expand);
430 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
431 }
432 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
434 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
436 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
439 }
440 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
444 setOperationAction(ISD::FSQRT, VT, Legal);
445 setOperationAction(ISD::FNEG, VT, Legal);
448 VT, Expand);
450 }
451 }
452
453 // Set DAG combine for LA32 and LA64.
454
459
460 // Set DAG combine for 'LSX' feature.
461
462 if (Subtarget.hasExtLSX()) {
464 setTargetDAGCombine(ISD::BITCAST);
465 }
466
467 // Set DAG combine for 'LASX' feature.
468
469 if (Subtarget.hasExtLASX())
471
472 // Compute derived properties from the register classes.
473 computeRegisterProperties(Subtarget.getRegisterInfo());
474
476
479
480 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
481
483
484 // Function alignments.
486 // Set preferred alignments.
487 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
488 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
489 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
490
491 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
492 if (Subtarget.hasLAMCAS())
494
495 if (Subtarget.hasSCQ()) {
497 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
498 }
499}
500
502 const GlobalAddressSDNode *GA) const {
503 // In order to maximise the opportunity for common subexpression elimination,
504 // keep a separate ADD node for the global address offset instead of folding
505 // it in the global address node. Later peephole optimisations may choose to
506 // fold it back in when profitable.
507 return false;
508}
509
511 SelectionDAG &DAG) const {
512 switch (Op.getOpcode()) {
513 case ISD::ATOMIC_FENCE:
514 return lowerATOMIC_FENCE(Op, DAG);
516 return lowerEH_DWARF_CFA(Op, DAG);
518 return lowerGlobalAddress(Op, DAG);
520 return lowerGlobalTLSAddress(Op, DAG);
522 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
524 return lowerINTRINSIC_W_CHAIN(Op, DAG);
526 return lowerINTRINSIC_VOID(Op, DAG);
528 return lowerBlockAddress(Op, DAG);
529 case ISD::JumpTable:
530 return lowerJumpTable(Op, DAG);
531 case ISD::SHL_PARTS:
532 return lowerShiftLeftParts(Op, DAG);
533 case ISD::SRA_PARTS:
534 return lowerShiftRightParts(Op, DAG, true);
535 case ISD::SRL_PARTS:
536 return lowerShiftRightParts(Op, DAG, false);
538 return lowerConstantPool(Op, DAG);
539 case ISD::FP_TO_SINT:
540 return lowerFP_TO_SINT(Op, DAG);
541 case ISD::BITCAST:
542 return lowerBITCAST(Op, DAG);
543 case ISD::UINT_TO_FP:
544 return lowerUINT_TO_FP(Op, DAG);
545 case ISD::SINT_TO_FP:
546 return lowerSINT_TO_FP(Op, DAG);
547 case ISD::VASTART:
548 return lowerVASTART(Op, DAG);
549 case ISD::FRAMEADDR:
550 return lowerFRAMEADDR(Op, DAG);
551 case ISD::RETURNADDR:
552 return lowerRETURNADDR(Op, DAG);
554 return lowerWRITE_REGISTER(Op, DAG);
556 return lowerINSERT_VECTOR_ELT(Op, DAG);
558 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
560 return lowerBUILD_VECTOR(Op, DAG);
562 return lowerCONCAT_VECTORS(Op, DAG);
564 return lowerVECTOR_SHUFFLE(Op, DAG);
565 case ISD::BITREVERSE:
566 return lowerBITREVERSE(Op, DAG);
568 return lowerSCALAR_TO_VECTOR(Op, DAG);
569 case ISD::PREFETCH:
570 return lowerPREFETCH(Op, DAG);
571 case ISD::SELECT:
572 return lowerSELECT(Op, DAG);
573 case ISD::BRCOND:
574 return lowerBRCOND(Op, DAG);
575 case ISD::FP_TO_FP16:
576 return lowerFP_TO_FP16(Op, DAG);
577 case ISD::FP16_TO_FP:
578 return lowerFP16_TO_FP(Op, DAG);
579 case ISD::FP_TO_BF16:
580 return lowerFP_TO_BF16(Op, DAG);
581 case ISD::BF16_TO_FP:
582 return lowerBF16_TO_FP(Op, DAG);
583 case ISD::VECREDUCE_ADD:
584 return lowerVECREDUCE_ADD(Op, DAG);
585 case ISD::VECREDUCE_AND:
586 case ISD::VECREDUCE_OR:
587 case ISD::VECREDUCE_XOR:
588 case ISD::VECREDUCE_SMAX:
589 case ISD::VECREDUCE_SMIN:
590 case ISD::VECREDUCE_UMAX:
591 case ISD::VECREDUCE_UMIN:
592 return lowerVECREDUCE(Op, DAG);
593 case ISD::ConstantFP:
594 return lowerConstantFP(Op, DAG);
595 }
596 return SDValue();
597}
598
599SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
600 SelectionDAG &DAG) const {
601 EVT VT = Op.getValueType();
603 const APFloat &FPVal = CFP->getValueAPF();
604 SDLoc DL(CFP);
605
606 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
607 (VT == MVT::f64 && Subtarget.hasBasicD()));
608
609 // If value is 0.0 or -0.0, just ignore it.
610 if (FPVal.isZero())
611 return SDValue();
612
613 // If lsx enabled, use cheaper 'vldi' instruction if possible.
614 if (isFPImmVLDILegal(FPVal, VT))
615 return SDValue();
616
617 // Construct as integer, and move to float register.
618 APInt INTVal = FPVal.bitcastToAPInt();
619
620 // If more than MaterializeFPImmInsNum instructions will be used to
621 // generate the INTVal and move it to float register, fallback to
622 // use floating point load from the constant pool.
624 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
625 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
626 return SDValue();
627
628 switch (VT.getSimpleVT().SimpleTy) {
629 default:
630 llvm_unreachable("Unexpected floating point type!");
631 break;
632 case MVT::f32: {
633 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
634 if (Subtarget.is64Bit())
635 NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
636 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
638 DL, VT, NewVal);
639 }
640 case MVT::f64: {
641 if (Subtarget.is64Bit()) {
642 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
643 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
644 }
645 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
646 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
647 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
648 }
649 }
650
651 return SDValue();
652}
653
654// Lower vecreduce_add using vhaddw instructions.
655// For Example:
656// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
657// can be lowered to:
658// VHADDW_D_W vr0, vr0, vr0
659// VHADDW_Q_D vr0, vr0, vr0
660// VPICKVE2GR_D a0, vr0, 0
661// ADDI_W a0, a0, 0
662SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
663 SelectionDAG &DAG) const {
664
665 SDLoc DL(Op);
666 MVT OpVT = Op.getSimpleValueType();
667 SDValue Val = Op.getOperand(0);
668
669 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
670 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
671 unsigned ResBits = OpVT.getScalarSizeInBits();
672
673 unsigned LegalVecSize = 128;
674 bool isLASX256Vector =
675 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
676
677 // Ensure operand type legal or enable it legal.
678 while (!isTypeLegal(Val.getSimpleValueType())) {
679 Val = DAG.WidenVector(Val, DL);
680 }
681
682 // NumEles is designed for iterations count, v4i32 for LSX
683 // and v8i32 for LASX should have the same count.
684 if (isLASX256Vector) {
685 NumEles /= 2;
686 LegalVecSize = 256;
687 }
688
689 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
690 MVT IntTy = MVT::getIntegerVT(EleBits);
691 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
692 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
693 }
694
695 if (isLASX256Vector) {
696 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
697 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
698 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
699 }
700
701 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
702 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
703 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
704}
705
706// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
707// For Example:
708// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
709// can be lowered to:
710// VBSRL_V vr1, vr0, 8
711// VMAX_W vr0, vr1, vr0
712// VBSRL_V vr1, vr0, 4
713// VMAX_W vr0, vr1, vr0
714// VPICKVE2GR_W a0, vr0, 0
715// For 256 bit vector, it is illegal and will be spilt into
716// two 128 bit vector by default then processed by this.
717SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
718 SelectionDAG &DAG) const {
719 SDLoc DL(Op);
720
721 MVT OpVT = Op.getSimpleValueType();
722 SDValue Val = Op.getOperand(0);
723
724 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
725 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
726
727 // Ensure operand type legal or enable it legal.
728 while (!isTypeLegal(Val.getSimpleValueType())) {
729 Val = DAG.WidenVector(Val, DL);
730 }
731
732 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
733 MVT VecTy = Val.getSimpleValueType();
734 MVT GRLenVT = Subtarget.getGRLenVT();
735
736 for (int i = NumEles; i > 1; i /= 2) {
737 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
738 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
739 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
740 }
741
742 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
743 DAG.getConstant(0, DL, GRLenVT));
744}
745
746SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
747 SelectionDAG &DAG) const {
748 unsigned IsData = Op.getConstantOperandVal(4);
749
750 // We don't support non-data prefetch.
751 // Just preserve the chain.
752 if (!IsData)
753 return Op.getOperand(0);
754
755 return Op;
756}
757
758// Return true if Val is equal to (setcc LHS, RHS, CC).
759// Return false if Val is the inverse of (setcc LHS, RHS, CC).
760// Otherwise, return std::nullopt.
761static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
762 ISD::CondCode CC, SDValue Val) {
763 assert(Val->getOpcode() == ISD::SETCC);
764 SDValue LHS2 = Val.getOperand(0);
765 SDValue RHS2 = Val.getOperand(1);
766 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
767
768 if (LHS == LHS2 && RHS == RHS2) {
769 if (CC == CC2)
770 return true;
771 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
772 return false;
773 } else if (LHS == RHS2 && RHS == LHS2) {
775 if (CC == CC2)
776 return true;
777 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
778 return false;
779 }
780
781 return std::nullopt;
782}
783
785 const LoongArchSubtarget &Subtarget) {
786 SDValue CondV = N->getOperand(0);
787 SDValue TrueV = N->getOperand(1);
788 SDValue FalseV = N->getOperand(2);
789 MVT VT = N->getSimpleValueType(0);
790 SDLoc DL(N);
791
792 // (select c, -1, y) -> -c | y
793 if (isAllOnesConstant(TrueV)) {
794 SDValue Neg = DAG.getNegative(CondV, DL, VT);
795 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
796 }
797 // (select c, y, -1) -> (c-1) | y
798 if (isAllOnesConstant(FalseV)) {
799 SDValue Neg =
800 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
801 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
802 }
803
804 // (select c, 0, y) -> (c-1) & y
805 if (isNullConstant(TrueV)) {
806 SDValue Neg =
807 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
808 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
809 }
810 // (select c, y, 0) -> -c & y
811 if (isNullConstant(FalseV)) {
812 SDValue Neg = DAG.getNegative(CondV, DL, VT);
813 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
814 }
815
816 // select c, ~x, x --> xor -c, x
817 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
818 const APInt &TrueVal = TrueV->getAsAPIntVal();
819 const APInt &FalseVal = FalseV->getAsAPIntVal();
820 if (~TrueVal == FalseVal) {
821 SDValue Neg = DAG.getNegative(CondV, DL, VT);
822 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
823 }
824 }
825
826 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
827 // when both truev and falsev are also setcc.
828 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
829 FalseV.getOpcode() == ISD::SETCC) {
830 SDValue LHS = CondV.getOperand(0);
831 SDValue RHS = CondV.getOperand(1);
832 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
833
834 // (select x, x, y) -> x | y
835 // (select !x, x, y) -> x & y
836 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
837 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
838 DAG.getFreeze(FalseV));
839 }
840 // (select x, y, x) -> x & y
841 // (select !x, y, x) -> x | y
842 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
843 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
844 DAG.getFreeze(TrueV), FalseV);
845 }
846 }
847
848 return SDValue();
849}
850
851// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
852// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
853// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
854// being `0` or `-1`. In such cases we can replace `select` with `and`.
855// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
856// than `c0`?
857static SDValue
859 const LoongArchSubtarget &Subtarget) {
860 unsigned SelOpNo = 0;
861 SDValue Sel = BO->getOperand(0);
862 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
863 SelOpNo = 1;
864 Sel = BO->getOperand(1);
865 }
866
867 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
868 return SDValue();
869
870 unsigned ConstSelOpNo = 1;
871 unsigned OtherSelOpNo = 2;
872 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
873 ConstSelOpNo = 2;
874 OtherSelOpNo = 1;
875 }
876 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
877 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
878 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
879 return SDValue();
880
881 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
882 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
883 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
884 return SDValue();
885
886 SDLoc DL(Sel);
887 EVT VT = BO->getValueType(0);
888
889 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
890 if (SelOpNo == 1)
891 std::swap(NewConstOps[0], NewConstOps[1]);
892
893 SDValue NewConstOp =
894 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
895 if (!NewConstOp)
896 return SDValue();
897
898 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
899 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
900 return SDValue();
901
902 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
903 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
904 if (SelOpNo == 1)
905 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
906 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
907
908 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
909 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
910 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
911}
912
913// Changes the condition code and swaps operands if necessary, so the SetCC
914// operation matches one of the comparisons supported directly by branches
915// in the LoongArch ISA. May adjust compares to favor compare with 0 over
916// compare with 1/-1.
918 ISD::CondCode &CC, SelectionDAG &DAG) {
919 // If this is a single bit test that can't be handled by ANDI, shift the
920 // bit to be tested to the MSB and perform a signed compare with 0.
921 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
922 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
923 isa<ConstantSDNode>(LHS.getOperand(1))) {
924 uint64_t Mask = LHS.getConstantOperandVal(1);
925 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
926 unsigned ShAmt = 0;
927 if (isPowerOf2_64(Mask)) {
928 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
929 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
930 } else {
931 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
932 }
933
934 LHS = LHS.getOperand(0);
935 if (ShAmt != 0)
936 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
937 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
938 return;
939 }
940 }
941
942 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
943 int64_t C = RHSC->getSExtValue();
944 switch (CC) {
945 default:
946 break;
947 case ISD::SETGT:
948 // Convert X > -1 to X >= 0.
949 if (C == -1) {
950 RHS = DAG.getConstant(0, DL, RHS.getValueType());
951 CC = ISD::SETGE;
952 return;
953 }
954 break;
955 case ISD::SETLT:
956 // Convert X < 1 to 0 >= X.
957 if (C == 1) {
958 RHS = LHS;
959 LHS = DAG.getConstant(0, DL, RHS.getValueType());
960 CC = ISD::SETGE;
961 return;
962 }
963 break;
964 }
965 }
966
967 switch (CC) {
968 default:
969 break;
970 case ISD::SETGT:
971 case ISD::SETLE:
972 case ISD::SETUGT:
973 case ISD::SETULE:
975 std::swap(LHS, RHS);
976 break;
977 }
978}
979
980SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
981 SelectionDAG &DAG) const {
982 SDValue CondV = Op.getOperand(0);
983 SDValue TrueV = Op.getOperand(1);
984 SDValue FalseV = Op.getOperand(2);
985 SDLoc DL(Op);
986 MVT VT = Op.getSimpleValueType();
987 MVT GRLenVT = Subtarget.getGRLenVT();
988
989 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
990 return V;
991
992 if (Op.hasOneUse()) {
993 unsigned UseOpc = Op->user_begin()->getOpcode();
994 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
995 SDNode *BinOp = *Op->user_begin();
996 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
997 DAG, Subtarget)) {
998 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
999 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1000 // may return a constant node and cause crash in lowerSELECT.
1001 if (NewSel.getOpcode() == ISD::SELECT)
1002 return lowerSELECT(NewSel, DAG);
1003 return NewSel;
1004 }
1005 }
1006 }
1007
1008 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1009 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1010 // (select condv, truev, falsev)
1011 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1012 if (CondV.getOpcode() != ISD::SETCC ||
1013 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1014 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1015 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1016
1017 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1018
1019 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1020 }
1021
1022 // If the CondV is the output of a SETCC node which operates on GRLenVT
1023 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1024 // to take advantage of the integer compare+branch instructions. i.e.: (select
1025 // (setcc lhs, rhs, cc), truev, falsev)
1026 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1027 SDValue LHS = CondV.getOperand(0);
1028 SDValue RHS = CondV.getOperand(1);
1029 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1030
1031 // Special case for a select of 2 constants that have a difference of 1.
1032 // Normally this is done by DAGCombine, but if the select is introduced by
1033 // type legalization or op legalization, we miss it. Restricting to SETLT
1034 // case for now because that is what signed saturating add/sub need.
1035 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1036 // but we would probably want to swap the true/false values if the condition
1037 // is SETGE/SETLE to avoid an XORI.
1038 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1039 CCVal == ISD::SETLT) {
1040 const APInt &TrueVal = TrueV->getAsAPIntVal();
1041 const APInt &FalseVal = FalseV->getAsAPIntVal();
1042 if (TrueVal - 1 == FalseVal)
1043 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1044 if (TrueVal + 1 == FalseVal)
1045 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1046 }
1047
1048 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1049 // 1 < x ? x : 1 -> 0 < x ? x : 1
1050 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1051 RHS == TrueV && LHS == FalseV) {
1052 LHS = DAG.getConstant(0, DL, VT);
1053 // 0 <u x is the same as x != 0.
1054 if (CCVal == ISD::SETULT) {
1055 std::swap(LHS, RHS);
1056 CCVal = ISD::SETNE;
1057 }
1058 }
1059
1060 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1061 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1062 RHS == FalseV) {
1063 RHS = DAG.getConstant(0, DL, VT);
1064 }
1065
1066 SDValue TargetCC = DAG.getCondCode(CCVal);
1067
1068 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1069 // (select (setcc lhs, rhs, CC), constant, falsev)
1070 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1071 std::swap(TrueV, FalseV);
1072 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1073 }
1074
1075 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1076 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1077}
1078
1079SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1080 SelectionDAG &DAG) const {
1081 SDValue CondV = Op.getOperand(1);
1082 SDLoc DL(Op);
1083 MVT GRLenVT = Subtarget.getGRLenVT();
1084
1085 if (CondV.getOpcode() == ISD::SETCC) {
1086 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1087 SDValue LHS = CondV.getOperand(0);
1088 SDValue RHS = CondV.getOperand(1);
1089 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1090
1091 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1092
1093 SDValue TargetCC = DAG.getCondCode(CCVal);
1094 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1095 Op.getOperand(0), LHS, RHS, TargetCC,
1096 Op.getOperand(2));
1097 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1098 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1099 Op.getOperand(0), CondV, Op.getOperand(2));
1100 }
1101 }
1102
1103 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1104 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1105 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1106}
1107
1108SDValue
1109LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1110 SelectionDAG &DAG) const {
1111 SDLoc DL(Op);
1112 MVT OpVT = Op.getSimpleValueType();
1113
1114 SDValue Vector = DAG.getUNDEF(OpVT);
1115 SDValue Val = Op.getOperand(0);
1116 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1117
1118 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1119}
1120
1121SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1122 SelectionDAG &DAG) const {
1123 EVT ResTy = Op->getValueType(0);
1124 SDValue Src = Op->getOperand(0);
1125 SDLoc DL(Op);
1126
1127 // LoongArchISD::BITREV_8B is not supported on LA32.
1128 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1129 return SDValue();
1130
1131 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1132 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1133 unsigned int NewEltNum = NewVT.getVectorNumElements();
1134
1135 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1136
1138 for (unsigned int i = 0; i < NewEltNum; i++) {
1139 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1140 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1141 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1142 ? (unsigned)LoongArchISD::BITREV_8B
1143 : (unsigned)ISD::BITREVERSE;
1144 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1145 }
1146 SDValue Res =
1147 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1148
1149 switch (ResTy.getSimpleVT().SimpleTy) {
1150 default:
1151 return SDValue();
1152 case MVT::v16i8:
1153 case MVT::v32i8:
1154 return Res;
1155 case MVT::v8i16:
1156 case MVT::v16i16:
1157 case MVT::v4i32:
1158 case MVT::v8i32: {
1160 for (unsigned int i = 0; i < NewEltNum; i++)
1161 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1162 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1163 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1164 }
1165 }
1166}
1167
1168// Widen element type to get a new mask value (if possible).
1169// For example:
1170// shufflevector <4 x i32> %a, <4 x i32> %b,
1171// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1172// is equivalent to:
1173// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1174// can be lowered to:
1175// VPACKOD_D vr0, vr0, vr1
1177 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1178 unsigned EltBits = VT.getScalarSizeInBits();
1179
1180 if (EltBits > 32 || EltBits == 1)
1181 return SDValue();
1182
1183 SmallVector<int, 8> NewMask;
1184 if (widenShuffleMaskElts(Mask, NewMask)) {
1185 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1186 : MVT::getIntegerVT(EltBits * 2);
1187 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1188 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1189 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1190 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1191 return DAG.getBitcast(
1192 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1193 }
1194 }
1195
1196 return SDValue();
1197}
1198
1199/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1200/// instruction.
1201// The funciton matches elements from one of the input vector shuffled to the
1202// left or right with zeroable elements 'shifted in'. It handles both the
1203// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1204// lane.
1205// Mostly copied from X86.
1206static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1207 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1208 int MaskOffset, const APInt &Zeroable) {
1209 int Size = Mask.size();
1210 unsigned SizeInBits = Size * ScalarSizeInBits;
1211
1212 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1213 for (int i = 0; i < Size; i += Scale)
1214 for (int j = 0; j < Shift; ++j)
1215 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1216 return false;
1217
1218 return true;
1219 };
1220
1221 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1222 int Step = 1) {
1223 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1224 if (!(Mask[i] == -1 || Mask[i] == Low))
1225 return false;
1226 return true;
1227 };
1228
1229 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1230 for (int i = 0; i != Size; i += Scale) {
1231 unsigned Pos = Left ? i + Shift : i;
1232 unsigned Low = Left ? i : i + Shift;
1233 unsigned Len = Scale - Shift;
1234 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1235 return -1;
1236 }
1237
1238 int ShiftEltBits = ScalarSizeInBits * Scale;
1239 bool ByteShift = ShiftEltBits > 64;
1240 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1241 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1242 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1243
1244 // Normalize the scale for byte shifts to still produce an i64 element
1245 // type.
1246 Scale = ByteShift ? Scale / 2 : Scale;
1247
1248 // We need to round trip through the appropriate type for the shift.
1249 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1250 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1251 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1252 return (int)ShiftAmt;
1253 };
1254
1255 unsigned MaxWidth = 128;
1256 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1257 for (int Shift = 1; Shift != Scale; ++Shift)
1258 for (bool Left : {true, false})
1259 if (CheckZeros(Shift, Scale, Left)) {
1260 int ShiftAmt = MatchShift(Shift, Scale, Left);
1261 if (0 < ShiftAmt)
1262 return ShiftAmt;
1263 }
1264
1265 // no match
1266 return -1;
1267}
1268
1269/// Lower VECTOR_SHUFFLE as shift (if possible).
1270///
1271/// For example:
1272/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1273/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1274/// is lowered to:
1275/// (VBSLL_V $v0, $v0, 4)
1276///
1277/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1278/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1279/// is lowered to:
1280/// (VSLLI_D $v0, $v0, 32)
1282 MVT VT, SDValue V1, SDValue V2,
1283 SelectionDAG &DAG,
1284 const LoongArchSubtarget &Subtarget,
1285 const APInt &Zeroable) {
1286 int Size = Mask.size();
1287 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1288
1289 MVT ShiftVT;
1290 SDValue V = V1;
1291 unsigned Opcode;
1292
1293 // Try to match shuffle against V1 shift.
1294 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1295 Mask, 0, Zeroable);
1296
1297 // If V1 failed, try to match shuffle against V2 shift.
1298 if (ShiftAmt < 0) {
1299 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1300 Mask, Size, Zeroable);
1301 V = V2;
1302 }
1303
1304 if (ShiftAmt < 0)
1305 return SDValue();
1306
1307 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1308 "Illegal integer vector type");
1309 V = DAG.getBitcast(ShiftVT, V);
1310 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1311 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1312 return DAG.getBitcast(VT, V);
1313}
1314
1315/// Determine whether a range fits a regular pattern of values.
1316/// This function accounts for the possibility of jumping over the End iterator.
1317template <typename ValType>
1318static bool
1320 unsigned CheckStride,
1322 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1323 auto &I = Begin;
1324
1325 while (I != End) {
1326 if (*I != -1 && *I != ExpectedIndex)
1327 return false;
1328 ExpectedIndex += ExpectedIndexStride;
1329
1330 // Incrementing past End is undefined behaviour so we must increment one
1331 // step at a time and check for End at each step.
1332 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1333 ; // Empty loop body.
1334 }
1335 return true;
1336}
1337
1338/// Compute whether each element of a shuffle is zeroable.
1339///
1340/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1342 SDValue V2, APInt &KnownUndef,
1343 APInt &KnownZero) {
1344 int Size = Mask.size();
1345 KnownUndef = KnownZero = APInt::getZero(Size);
1346
1347 V1 = peekThroughBitcasts(V1);
1348 V2 = peekThroughBitcasts(V2);
1349
1350 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1351 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1352
1353 int VectorSizeInBits = V1.getValueSizeInBits();
1354 int ScalarSizeInBits = VectorSizeInBits / Size;
1355 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1356 (void)ScalarSizeInBits;
1357
1358 for (int i = 0; i < Size; ++i) {
1359 int M = Mask[i];
1360 if (M < 0) {
1361 KnownUndef.setBit(i);
1362 continue;
1363 }
1364 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1365 KnownZero.setBit(i);
1366 continue;
1367 }
1368 }
1369}
1370
1371/// Test whether a shuffle mask is equivalent within each sub-lane.
1372///
1373/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1374/// non-trivial to compute in the face of undef lanes. The representation is
1375/// suitable for use with existing 128-bit shuffles as entries from the second
1376/// vector have been remapped to [LaneSize, 2*LaneSize).
1377static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1378 ArrayRef<int> Mask,
1379 SmallVectorImpl<int> &RepeatedMask) {
1380 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1381 RepeatedMask.assign(LaneSize, -1);
1382 int Size = Mask.size();
1383 for (int i = 0; i < Size; ++i) {
1384 assert(Mask[i] == -1 || Mask[i] >= 0);
1385 if (Mask[i] < 0)
1386 continue;
1387 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1388 // This entry crosses lanes, so there is no way to model this shuffle.
1389 return false;
1390
1391 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1392 // Adjust second vector indices to start at LaneSize instead of Size.
1393 int LocalM =
1394 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1395 if (RepeatedMask[i % LaneSize] < 0)
1396 // This is the first non-undef entry in this slot of a 128-bit lane.
1397 RepeatedMask[i % LaneSize] = LocalM;
1398 else if (RepeatedMask[i % LaneSize] != LocalM)
1399 // Found a mismatch with the repeated mask.
1400 return false;
1401 }
1402 return true;
1403}
1404
1405/// Attempts to match vector shuffle as byte rotation.
1407 ArrayRef<int> Mask) {
1408
1409 SDValue Lo, Hi;
1410 SmallVector<int, 16> RepeatedMask;
1411
1412 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1413 return -1;
1414
1415 int NumElts = RepeatedMask.size();
1416 int Rotation = 0;
1417 int Scale = 16 / NumElts;
1418
1419 for (int i = 0; i < NumElts; ++i) {
1420 int M = RepeatedMask[i];
1421 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1422 "Unexpected mask index.");
1423 if (M < 0)
1424 continue;
1425
1426 // Determine where a rotated vector would have started.
1427 int StartIdx = i - (M % NumElts);
1428 if (StartIdx == 0)
1429 return -1;
1430
1431 // If we found the tail of a vector the rotation must be the missing
1432 // front. If we found the head of a vector, it must be how much of the
1433 // head.
1434 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1435
1436 if (Rotation == 0)
1437 Rotation = CandidateRotation;
1438 else if (Rotation != CandidateRotation)
1439 return -1;
1440
1441 // Compute which value this mask is pointing at.
1442 SDValue MaskV = M < NumElts ? V1 : V2;
1443
1444 // Compute which of the two target values this index should be assigned
1445 // to. This reflects whether the high elements are remaining or the low
1446 // elements are remaining.
1447 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1448
1449 // Either set up this value if we've not encountered it before, or check
1450 // that it remains consistent.
1451 if (!TargetV)
1452 TargetV = MaskV;
1453 else if (TargetV != MaskV)
1454 return -1;
1455 }
1456
1457 // Check that we successfully analyzed the mask, and normalize the results.
1458 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1459 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1460 if (!Lo)
1461 Lo = Hi;
1462 else if (!Hi)
1463 Hi = Lo;
1464
1465 V1 = Lo;
1466 V2 = Hi;
1467
1468 return Rotation * Scale;
1469}
1470
1471/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1472///
1473/// For example:
1474/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1475/// <2 x i32> <i32 3, i32 0>
1476/// is lowered to:
1477/// (VBSRL_V $v1, $v1, 8)
1478/// (VBSLL_V $v0, $v0, 8)
1479/// (VOR_V $v0, $V0, $v1)
1480static SDValue
1482 SDValue V1, SDValue V2, SelectionDAG &DAG,
1483 const LoongArchSubtarget &Subtarget) {
1484
1485 SDValue Lo = V1, Hi = V2;
1486 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1487 if (ByteRotation <= 0)
1488 return SDValue();
1489
1490 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1491 Lo = DAG.getBitcast(ByteVT, Lo);
1492 Hi = DAG.getBitcast(ByteVT, Hi);
1493
1494 int LoByteShift = 16 - ByteRotation;
1495 int HiByteShift = ByteRotation;
1496 MVT GRLenVT = Subtarget.getGRLenVT();
1497
1498 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1499 DAG.getConstant(LoByteShift, DL, GRLenVT));
1500 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1501 DAG.getConstant(HiByteShift, DL, GRLenVT));
1502 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1503}
1504
1505/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1506///
1507/// For example:
1508/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1509/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1510/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1511/// is lowered to:
1512/// (VREPLI $v1, 0)
1513/// (VILVL $v0, $v1, $v0)
1515 ArrayRef<int> Mask, MVT VT,
1516 SDValue V1, SDValue V2,
1517 SelectionDAG &DAG,
1518 const APInt &Zeroable) {
1519 int Bits = VT.getSizeInBits();
1520 int EltBits = VT.getScalarSizeInBits();
1521 int NumElements = VT.getVectorNumElements();
1522
1523 if (Zeroable.isAllOnes())
1524 return DAG.getConstant(0, DL, VT);
1525
1526 // Define a helper function to check a particular ext-scale and lower to it if
1527 // valid.
1528 auto Lower = [&](int Scale) -> SDValue {
1529 SDValue InputV;
1530 bool AnyExt = true;
1531 int Offset = 0;
1532 for (int i = 0; i < NumElements; i++) {
1533 int M = Mask[i];
1534 if (M < 0)
1535 continue;
1536 if (i % Scale != 0) {
1537 // Each of the extended elements need to be zeroable.
1538 if (!Zeroable[i])
1539 return SDValue();
1540
1541 AnyExt = false;
1542 continue;
1543 }
1544
1545 // Each of the base elements needs to be consecutive indices into the
1546 // same input vector.
1547 SDValue V = M < NumElements ? V1 : V2;
1548 M = M % NumElements;
1549 if (!InputV) {
1550 InputV = V;
1551 Offset = M - (i / Scale);
1552
1553 // These offset can't be handled
1554 if (Offset % (NumElements / Scale))
1555 return SDValue();
1556 } else if (InputV != V)
1557 return SDValue();
1558
1559 if (M != (Offset + (i / Scale)))
1560 return SDValue(); // Non-consecutive strided elements.
1561 }
1562
1563 // If we fail to find an input, we have a zero-shuffle which should always
1564 // have already been handled.
1565 if (!InputV)
1566 return SDValue();
1567
1568 do {
1569 unsigned VilVLoHi = LoongArchISD::VILVL;
1570 if (Offset >= (NumElements / 2)) {
1571 VilVLoHi = LoongArchISD::VILVH;
1572 Offset -= (NumElements / 2);
1573 }
1574
1575 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1576 SDValue Ext =
1577 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1578 InputV = DAG.getBitcast(InputVT, InputV);
1579 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1580 Scale /= 2;
1581 EltBits *= 2;
1582 NumElements /= 2;
1583 } while (Scale > 1);
1584 return DAG.getBitcast(VT, InputV);
1585 };
1586
1587 // Each iteration, try extending the elements half as much, but into twice as
1588 // many elements.
1589 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1590 NumExtElements *= 2) {
1591 if (SDValue V = Lower(NumElements / NumExtElements))
1592 return V;
1593 }
1594 return SDValue();
1595}
1596
1597/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1598///
1599/// VREPLVEI performs vector broadcast based on an element specified by an
1600/// integer immediate, with its mask being similar to:
1601/// <x, x, x, ...>
1602/// where x is any valid index.
1603///
1604/// When undef's appear in the mask they are treated as if they were whatever
1605/// value is necessary in order to fit the above form.
1606static SDValue
1608 SDValue V1, SelectionDAG &DAG,
1609 const LoongArchSubtarget &Subtarget) {
1610 int SplatIndex = -1;
1611 for (const auto &M : Mask) {
1612 if (M != -1) {
1613 SplatIndex = M;
1614 break;
1615 }
1616 }
1617
1618 if (SplatIndex == -1)
1619 return DAG.getUNDEF(VT);
1620
1621 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1622 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1623 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1624 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1625 }
1626
1627 return SDValue();
1628}
1629
1630/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1631///
1632/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1633/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1634///
1635/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1636/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1637/// When undef's appear they are treated as if they were whatever value is
1638/// necessary in order to fit the above forms.
1639///
1640/// For example:
1641/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1642/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1643/// i32 7, i32 6, i32 5, i32 4>
1644/// is lowered to:
1645/// (VSHUF4I_H $v0, $v1, 27)
1646/// where the 27 comes from:
1647/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1648static SDValue
1650 SDValue V1, SDValue V2, SelectionDAG &DAG,
1651 const LoongArchSubtarget &Subtarget) {
1652
1653 unsigned SubVecSize = 4;
1654 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1655 SubVecSize = 2;
1656
1657 int SubMask[4] = {-1, -1, -1, -1};
1658 for (unsigned i = 0; i < SubVecSize; ++i) {
1659 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1660 int M = Mask[j];
1661
1662 // Convert from vector index to 4-element subvector index
1663 // If an index refers to an element outside of the subvector then give up
1664 if (M != -1) {
1665 M -= 4 * (j / SubVecSize);
1666 if (M < 0 || M >= 4)
1667 return SDValue();
1668 }
1669
1670 // If the mask has an undef, replace it with the current index.
1671 // Note that it might still be undef if the current index is also undef
1672 if (SubMask[i] == -1)
1673 SubMask[i] = M;
1674 // Check that non-undef values are the same as in the mask. If they
1675 // aren't then give up
1676 else if (M != -1 && M != SubMask[i])
1677 return SDValue();
1678 }
1679 }
1680
1681 // Calculate the immediate. Replace any remaining undefs with zero
1682 int Imm = 0;
1683 for (int i = SubVecSize - 1; i >= 0; --i) {
1684 int M = SubMask[i];
1685
1686 if (M == -1)
1687 M = 0;
1688
1689 Imm <<= 2;
1690 Imm |= M & 0x3;
1691 }
1692
1693 MVT GRLenVT = Subtarget.getGRLenVT();
1694
1695 // Return vshuf4i.d
1696 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1697 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1698 DAG.getConstant(Imm, DL, GRLenVT));
1699
1700 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1701 DAG.getConstant(Imm, DL, GRLenVT));
1702}
1703
1704/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1705///
1706/// VPACKEV interleaves the even elements from each vector.
1707///
1708/// It is possible to lower into VPACKEV when the mask consists of two of the
1709/// following forms interleaved:
1710/// <0, 2, 4, ...>
1711/// <n, n+2, n+4, ...>
1712/// where n is the number of elements in the vector.
1713/// For example:
1714/// <0, 0, 2, 2, 4, 4, ...>
1715/// <0, n, 2, n+2, 4, n+4, ...>
1716///
1717/// When undef's appear in the mask they are treated as if they were whatever
1718/// value is necessary in order to fit the above forms.
1720 MVT VT, SDValue V1, SDValue V2,
1721 SelectionDAG &DAG) {
1722
1723 const auto &Begin = Mask.begin();
1724 const auto &End = Mask.end();
1725 SDValue OriV1 = V1, OriV2 = V2;
1726
1727 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1728 V1 = OriV1;
1729 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1730 V1 = OriV2;
1731 else
1732 return SDValue();
1733
1734 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1735 V2 = OriV1;
1736 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1737 V2 = OriV2;
1738 else
1739 return SDValue();
1740
1741 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1742}
1743
1744/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1745///
1746/// VPACKOD interleaves the odd elements from each vector.
1747///
1748/// It is possible to lower into VPACKOD when the mask consists of two of the
1749/// following forms interleaved:
1750/// <1, 3, 5, ...>
1751/// <n+1, n+3, n+5, ...>
1752/// where n is the number of elements in the vector.
1753/// For example:
1754/// <1, 1, 3, 3, 5, 5, ...>
1755/// <1, n+1, 3, n+3, 5, n+5, ...>
1756///
1757/// When undef's appear in the mask they are treated as if they were whatever
1758/// value is necessary in order to fit the above forms.
1760 MVT VT, SDValue V1, SDValue V2,
1761 SelectionDAG &DAG) {
1762
1763 const auto &Begin = Mask.begin();
1764 const auto &End = Mask.end();
1765 SDValue OriV1 = V1, OriV2 = V2;
1766
1767 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1768 V1 = OriV1;
1769 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1770 V1 = OriV2;
1771 else
1772 return SDValue();
1773
1774 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1775 V2 = OriV1;
1776 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1777 V2 = OriV2;
1778 else
1779 return SDValue();
1780
1781 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1782}
1783
1784/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1785///
1786/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1787/// of each vector.
1788///
1789/// It is possible to lower into VILVH when the mask consists of two of the
1790/// following forms interleaved:
1791/// <x, x+1, x+2, ...>
1792/// <n+x, n+x+1, n+x+2, ...>
1793/// where n is the number of elements in the vector and x is half n.
1794/// For example:
1795/// <x, x, x+1, x+1, x+2, x+2, ...>
1796/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1797///
1798/// When undef's appear in the mask they are treated as if they were whatever
1799/// value is necessary in order to fit the above forms.
1801 MVT VT, SDValue V1, SDValue V2,
1802 SelectionDAG &DAG) {
1803
1804 const auto &Begin = Mask.begin();
1805 const auto &End = Mask.end();
1806 unsigned HalfSize = Mask.size() / 2;
1807 SDValue OriV1 = V1, OriV2 = V2;
1808
1809 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1810 V1 = OriV1;
1811 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1812 V1 = OriV2;
1813 else
1814 return SDValue();
1815
1816 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1817 V2 = OriV1;
1818 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1819 1))
1820 V2 = OriV2;
1821 else
1822 return SDValue();
1823
1824 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1825}
1826
1827/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1828///
1829/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1830/// of each vector.
1831///
1832/// It is possible to lower into VILVL when the mask consists of two of the
1833/// following forms interleaved:
1834/// <0, 1, 2, ...>
1835/// <n, n+1, n+2, ...>
1836/// where n is the number of elements in the vector.
1837/// For example:
1838/// <0, 0, 1, 1, 2, 2, ...>
1839/// <0, n, 1, n+1, 2, n+2, ...>
1840///
1841/// When undef's appear in the mask they are treated as if they were whatever
1842/// value is necessary in order to fit the above forms.
1844 MVT VT, SDValue V1, SDValue V2,
1845 SelectionDAG &DAG) {
1846
1847 const auto &Begin = Mask.begin();
1848 const auto &End = Mask.end();
1849 SDValue OriV1 = V1, OriV2 = V2;
1850
1851 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1852 V1 = OriV1;
1853 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1854 V1 = OriV2;
1855 else
1856 return SDValue();
1857
1858 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1859 V2 = OriV1;
1860 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1861 V2 = OriV2;
1862 else
1863 return SDValue();
1864
1865 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1866}
1867
1868/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1869///
1870/// VPICKEV copies the even elements of each vector into the result vector.
1871///
1872/// It is possible to lower into VPICKEV when the mask consists of two of the
1873/// following forms concatenated:
1874/// <0, 2, 4, ...>
1875/// <n, n+2, n+4, ...>
1876/// where n is the number of elements in the vector.
1877/// For example:
1878/// <0, 2, 4, ..., 0, 2, 4, ...>
1879/// <0, 2, 4, ..., n, n+2, n+4, ...>
1880///
1881/// When undef's appear in the mask they are treated as if they were whatever
1882/// value is necessary in order to fit the above forms.
1884 MVT VT, SDValue V1, SDValue V2,
1885 SelectionDAG &DAG) {
1886
1887 const auto &Begin = Mask.begin();
1888 const auto &Mid = Mask.begin() + Mask.size() / 2;
1889 const auto &End = Mask.end();
1890 SDValue OriV1 = V1, OriV2 = V2;
1891
1892 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1893 V1 = OriV1;
1894 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1895 V1 = OriV2;
1896 else
1897 return SDValue();
1898
1899 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1900 V2 = OriV1;
1901 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1902 V2 = OriV2;
1903
1904 else
1905 return SDValue();
1906
1907 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1908}
1909
1910/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1911///
1912/// VPICKOD copies the odd elements of each vector into the result vector.
1913///
1914/// It is possible to lower into VPICKOD when the mask consists of two of the
1915/// following forms concatenated:
1916/// <1, 3, 5, ...>
1917/// <n+1, n+3, n+5, ...>
1918/// where n is the number of elements in the vector.
1919/// For example:
1920/// <1, 3, 5, ..., 1, 3, 5, ...>
1921/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1922///
1923/// When undef's appear in the mask they are treated as if they were whatever
1924/// value is necessary in order to fit the above forms.
1926 MVT VT, SDValue V1, SDValue V2,
1927 SelectionDAG &DAG) {
1928
1929 const auto &Begin = Mask.begin();
1930 const auto &Mid = Mask.begin() + Mask.size() / 2;
1931 const auto &End = Mask.end();
1932 SDValue OriV1 = V1, OriV2 = V2;
1933
1934 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1935 V1 = OriV1;
1936 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1937 V1 = OriV2;
1938 else
1939 return SDValue();
1940
1941 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1942 V2 = OriV1;
1943 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1944 V2 = OriV2;
1945 else
1946 return SDValue();
1947
1948 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1949}
1950
1951/// Lower VECTOR_SHUFFLE into VSHUF.
1952///
1953/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1954/// adding it as an operand to the resulting VSHUF.
1956 MVT VT, SDValue V1, SDValue V2,
1957 SelectionDAG &DAG,
1958 const LoongArchSubtarget &Subtarget) {
1959
1961 for (auto M : Mask)
1962 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
1963
1964 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1965 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1966
1967 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1968 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1969 // VSHF concatenates the vectors in a bitwise fashion:
1970 // <0b00, 0b01> + <0b10, 0b11> ->
1971 // 0b0100 + 0b1110 -> 0b01001110
1972 // <0b10, 0b11, 0b00, 0b01>
1973 // We must therefore swap the operands to get the correct result.
1974 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1975}
1976
1977/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1978///
1979/// This routine breaks down the specific type of 128-bit shuffle and
1980/// dispatches to the lowering routines accordingly.
1982 SDValue V1, SDValue V2, SelectionDAG &DAG,
1983 const LoongArchSubtarget &Subtarget) {
1984 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1985 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1986 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1987 "Vector type is unsupported for lsx!");
1989 "Two operands have different types!");
1990 assert(VT.getVectorNumElements() == Mask.size() &&
1991 "Unexpected mask size for shuffle!");
1992 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1993
1994 APInt KnownUndef, KnownZero;
1995 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1996 APInt Zeroable = KnownUndef | KnownZero;
1997
1998 SDValue Result;
1999 // TODO: Add more comparison patterns.
2000 if (V2.isUndef()) {
2001 if ((Result =
2002 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2003 return Result;
2004 if ((Result =
2005 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2006 return Result;
2007
2008 // TODO: This comment may be enabled in the future to better match the
2009 // pattern for instruction selection.
2010 /* V2 = V1; */
2011 }
2012
2013 // It is recommended not to change the pattern comparison order for better
2014 // performance.
2015 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2016 return Result;
2017 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2018 return Result;
2019 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2020 return Result;
2021 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2022 return Result;
2023 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2024 return Result;
2025 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2026 return Result;
2027 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2028 (Result =
2029 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2030 return Result;
2031 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2032 Zeroable)))
2033 return Result;
2034 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2035 Zeroable)))
2036 return Result;
2037 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2038 Subtarget)))
2039 return Result;
2040 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2041 return NewShuffle;
2042 if ((Result =
2043 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2044 return Result;
2045 return SDValue();
2046}
2047
2048/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2049///
2050/// It is a XVREPLVEI when the mask is:
2051/// <x, x, x, ..., x+n, x+n, x+n, ...>
2052/// where the number of x is equal to n and n is half the length of vector.
2053///
2054/// When undef's appear in the mask they are treated as if they were whatever
2055/// value is necessary in order to fit the above form.
2056static SDValue
2058 SDValue V1, SelectionDAG &DAG,
2059 const LoongArchSubtarget &Subtarget) {
2060 int SplatIndex = -1;
2061 for (const auto &M : Mask) {
2062 if (M != -1) {
2063 SplatIndex = M;
2064 break;
2065 }
2066 }
2067
2068 if (SplatIndex == -1)
2069 return DAG.getUNDEF(VT);
2070
2071 const auto &Begin = Mask.begin();
2072 const auto &End = Mask.end();
2073 int HalfSize = Mask.size() / 2;
2074
2075 if (SplatIndex >= HalfSize)
2076 return SDValue();
2077
2078 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2079 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2080 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2081 0)) {
2082 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2083 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2084 }
2085
2086 return SDValue();
2087}
2088
2089/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2090static SDValue
2092 SDValue V1, SDValue V2, SelectionDAG &DAG,
2093 const LoongArchSubtarget &Subtarget) {
2094 // When the size is less than or equal to 4, lower cost instructions may be
2095 // used.
2096 if (Mask.size() <= 4)
2097 return SDValue();
2098 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2099}
2100
2101/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2102static SDValue
2104 SDValue V1, SelectionDAG &DAG,
2105 const LoongArchSubtarget &Subtarget) {
2106 // Only consider XVPERMI_D.
2107 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2108 return SDValue();
2109
2110 unsigned MaskImm = 0;
2111 for (unsigned i = 0; i < Mask.size(); ++i) {
2112 if (Mask[i] == -1)
2113 continue;
2114 MaskImm |= Mask[i] << (i * 2);
2115 }
2116
2117 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2118 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2119}
2120
2121/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2123 MVT VT, SDValue V1, SelectionDAG &DAG,
2124 const LoongArchSubtarget &Subtarget) {
2125 // LoongArch LASX only have XVPERM_W.
2126 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2127 return SDValue();
2128
2129 unsigned NumElts = VT.getVectorNumElements();
2130 unsigned HalfSize = NumElts / 2;
2131 bool FrontLo = true, FrontHi = true;
2132 bool BackLo = true, BackHi = true;
2133
2134 auto inRange = [](int val, int low, int high) {
2135 return (val == -1) || (val >= low && val < high);
2136 };
2137
2138 for (unsigned i = 0; i < HalfSize; ++i) {
2139 int Fronti = Mask[i];
2140 int Backi = Mask[i + HalfSize];
2141
2142 FrontLo &= inRange(Fronti, 0, HalfSize);
2143 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2144 BackLo &= inRange(Backi, 0, HalfSize);
2145 BackHi &= inRange(Backi, HalfSize, NumElts);
2146 }
2147
2148 // If both the lower and upper 128-bit parts access only one half of the
2149 // vector (either lower or upper), avoid using xvperm.w. The latency of
2150 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2151 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2152 return SDValue();
2153
2155 MVT GRLenVT = Subtarget.getGRLenVT();
2156 for (unsigned i = 0; i < NumElts; ++i)
2157 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2158 : DAG.getConstant(Mask[i], DL, GRLenVT));
2159 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2160
2161 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2162}
2163
2164/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2166 MVT VT, SDValue V1, SDValue V2,
2167 SelectionDAG &DAG) {
2168 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2169}
2170
2171/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2173 MVT VT, SDValue V1, SDValue V2,
2174 SelectionDAG &DAG) {
2175 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2176}
2177
2178/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2180 MVT VT, SDValue V1, SDValue V2,
2181 SelectionDAG &DAG) {
2182
2183 const auto &Begin = Mask.begin();
2184 const auto &End = Mask.end();
2185 unsigned HalfSize = Mask.size() / 2;
2186 unsigned LeftSize = HalfSize / 2;
2187 SDValue OriV1 = V1, OriV2 = V2;
2188
2189 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2190 1) &&
2191 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2192 V1 = OriV1;
2193 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2194 Mask.size() + HalfSize - LeftSize, 1) &&
2195 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2196 Mask.size() + HalfSize + LeftSize, 1))
2197 V1 = OriV2;
2198 else
2199 return SDValue();
2200
2201 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2202 1) &&
2203 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2204 1))
2205 V2 = OriV1;
2206 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2207 Mask.size() + HalfSize - LeftSize, 1) &&
2208 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2209 Mask.size() + HalfSize + LeftSize, 1))
2210 V2 = OriV2;
2211 else
2212 return SDValue();
2213
2214 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2215}
2216
2217/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2219 MVT VT, SDValue V1, SDValue V2,
2220 SelectionDAG &DAG) {
2221
2222 const auto &Begin = Mask.begin();
2223 const auto &End = Mask.end();
2224 unsigned HalfSize = Mask.size() / 2;
2225 SDValue OriV1 = V1, OriV2 = V2;
2226
2227 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2228 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2229 V1 = OriV1;
2230 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2231 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2232 Mask.size() + HalfSize, 1))
2233 V1 = OriV2;
2234 else
2235 return SDValue();
2236
2237 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2238 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2239 V2 = OriV1;
2240 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2241 1) &&
2242 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2243 Mask.size() + HalfSize, 1))
2244 V2 = OriV2;
2245 else
2246 return SDValue();
2247
2248 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2249}
2250
2251/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2253 MVT VT, SDValue V1, SDValue V2,
2254 SelectionDAG &DAG) {
2255
2256 const auto &Begin = Mask.begin();
2257 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2258 const auto &Mid = Mask.begin() + Mask.size() / 2;
2259 const auto &RightMid = Mask.end() - Mask.size() / 4;
2260 const auto &End = Mask.end();
2261 unsigned HalfSize = Mask.size() / 2;
2262 SDValue OriV1 = V1, OriV2 = V2;
2263
2264 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2265 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2266 V1 = OriV1;
2267 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2268 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2269 V1 = OriV2;
2270 else
2271 return SDValue();
2272
2273 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2274 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2275 V2 = OriV1;
2276 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2277 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2278 V2 = OriV2;
2279
2280 else
2281 return SDValue();
2282
2283 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2284}
2285
2286/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2288 MVT VT, SDValue V1, SDValue V2,
2289 SelectionDAG &DAG) {
2290
2291 const auto &Begin = Mask.begin();
2292 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2293 const auto &Mid = Mask.begin() + Mask.size() / 2;
2294 const auto &RightMid = Mask.end() - Mask.size() / 4;
2295 const auto &End = Mask.end();
2296 unsigned HalfSize = Mask.size() / 2;
2297 SDValue OriV1 = V1, OriV2 = V2;
2298
2299 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2300 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2301 V1 = OriV1;
2302 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2303 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2304 2))
2305 V1 = OriV2;
2306 else
2307 return SDValue();
2308
2309 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2310 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2311 V2 = OriV1;
2312 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2313 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2314 2))
2315 V2 = OriV2;
2316 else
2317 return SDValue();
2318
2319 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2320}
2321
2322/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2323static SDValue
2325 SDValue V1, SDValue V2, SelectionDAG &DAG,
2326 const LoongArchSubtarget &Subtarget) {
2327 // LoongArch LASX only supports xvinsve0.{w/d}.
2328 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2329 VT != MVT::v4f64)
2330 return SDValue();
2331
2332 MVT GRLenVT = Subtarget.getGRLenVT();
2333 int MaskSize = Mask.size();
2334 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2335
2336 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2337 // all other elements are either 'Base + i' or undef (-1). On success, return
2338 // the index of the replaced element. Otherwise, just return -1.
2339 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2340 int Idx = -1;
2341 for (int i = 0; i < MaskSize; ++i) {
2342 if (Mask[i] == Base + i || Mask[i] == -1)
2343 continue;
2344 if (Mask[i] != Replaced)
2345 return -1;
2346 if (Idx == -1)
2347 Idx = i;
2348 else
2349 return -1;
2350 }
2351 return Idx;
2352 };
2353
2354 // Case 1: the lowest element of V2 replaces one element in V1.
2355 int Idx = checkReplaceOne(0, MaskSize);
2356 if (Idx != -1)
2357 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2358 DAG.getConstant(Idx, DL, GRLenVT));
2359
2360 // Case 2: the lowest element of V1 replaces one element in V2.
2361 Idx = checkReplaceOne(MaskSize, 0);
2362 if (Idx != -1)
2363 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2364 DAG.getConstant(Idx, DL, GRLenVT));
2365
2366 return SDValue();
2367}
2368
2369/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2371 MVT VT, SDValue V1, SDValue V2,
2372 SelectionDAG &DAG) {
2373
2374 int MaskSize = Mask.size();
2375 int HalfSize = Mask.size() / 2;
2376 const auto &Begin = Mask.begin();
2377 const auto &Mid = Mask.begin() + HalfSize;
2378 const auto &End = Mask.end();
2379
2380 // VECTOR_SHUFFLE concatenates the vectors:
2381 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2382 // shuffling ->
2383 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2384 //
2385 // XVSHUF concatenates the vectors:
2386 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2387 // shuffling ->
2388 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2389 SmallVector<SDValue, 8> MaskAlloc;
2390 for (auto it = Begin; it < Mid; it++) {
2391 if (*it < 0) // UNDEF
2392 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2393 else if ((*it >= 0 && *it < HalfSize) ||
2394 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2395 int M = *it < HalfSize ? *it : *it - HalfSize;
2396 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2397 } else
2398 return SDValue();
2399 }
2400 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2401
2402 for (auto it = Mid; it < End; it++) {
2403 if (*it < 0) // UNDEF
2404 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2405 else if ((*it >= HalfSize && *it < MaskSize) ||
2406 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2407 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2408 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2409 } else
2410 return SDValue();
2411 }
2412 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2413
2414 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2415 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2416 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2417}
2418
2419/// Shuffle vectors by lane to generate more optimized instructions.
2420/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2421///
2422/// Therefore, except for the following four cases, other cases are regarded
2423/// as cross-lane shuffles, where optimization is relatively limited.
2424///
2425/// - Shuffle high, low lanes of two inputs vector
2426/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2427/// - Shuffle low, high lanes of two inputs vector
2428/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2429/// - Shuffle low, low lanes of two inputs vector
2430/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2431/// - Shuffle high, high lanes of two inputs vector
2432/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2433///
2434/// The first case is the closest to LoongArch instructions and the other
2435/// cases need to be converted to it for processing.
2436///
2437/// This function will return true for the last three cases above and will
2438/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2439/// cross-lane shuffle cases.
2441 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2442 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2443
2444 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2445
2446 int MaskSize = Mask.size();
2447 int HalfSize = Mask.size() / 2;
2448 MVT GRLenVT = Subtarget.getGRLenVT();
2449
2450 HalfMaskType preMask = None, postMask = None;
2451
2452 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2453 return M < 0 || (M >= 0 && M < HalfSize) ||
2454 (M >= MaskSize && M < MaskSize + HalfSize);
2455 }))
2456 preMask = HighLaneTy;
2457 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2458 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2459 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2460 }))
2461 preMask = LowLaneTy;
2462
2463 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2464 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2465 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2466 }))
2467 postMask = LowLaneTy;
2468 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2469 return M < 0 || (M >= 0 && M < HalfSize) ||
2470 (M >= MaskSize && M < MaskSize + HalfSize);
2471 }))
2472 postMask = HighLaneTy;
2473
2474 // The pre-half of mask is high lane type, and the post-half of mask
2475 // is low lane type, which is closest to the LoongArch instructions.
2476 //
2477 // Note: In the LoongArch architecture, the high lane of mask corresponds
2478 // to the lower 128-bit of vector register, and the low lane of mask
2479 // corresponds the higher 128-bit of vector register.
2480 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2481 return false;
2482 }
2483 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2484 V1 = DAG.getBitcast(MVT::v4i64, V1);
2485 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2486 DAG.getConstant(0b01001110, DL, GRLenVT));
2487 V1 = DAG.getBitcast(VT, V1);
2488
2489 if (!V2.isUndef()) {
2490 V2 = DAG.getBitcast(MVT::v4i64, V2);
2491 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2492 DAG.getConstant(0b01001110, DL, GRLenVT));
2493 V2 = DAG.getBitcast(VT, V2);
2494 }
2495
2496 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2497 *it = *it < 0 ? *it : *it - HalfSize;
2498 }
2499 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2500 *it = *it < 0 ? *it : *it + HalfSize;
2501 }
2502 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2503 V1 = DAG.getBitcast(MVT::v4i64, V1);
2504 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2505 DAG.getConstant(0b11101110, DL, GRLenVT));
2506 V1 = DAG.getBitcast(VT, V1);
2507
2508 if (!V2.isUndef()) {
2509 V2 = DAG.getBitcast(MVT::v4i64, V2);
2510 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2511 DAG.getConstant(0b11101110, DL, GRLenVT));
2512 V2 = DAG.getBitcast(VT, V2);
2513 }
2514
2515 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2516 *it = *it < 0 ? *it : *it - HalfSize;
2517 }
2518 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2519 V1 = DAG.getBitcast(MVT::v4i64, V1);
2520 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2521 DAG.getConstant(0b01000100, DL, GRLenVT));
2522 V1 = DAG.getBitcast(VT, V1);
2523
2524 if (!V2.isUndef()) {
2525 V2 = DAG.getBitcast(MVT::v4i64, V2);
2526 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2527 DAG.getConstant(0b01000100, DL, GRLenVT));
2528 V2 = DAG.getBitcast(VT, V2);
2529 }
2530
2531 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2532 *it = *it < 0 ? *it : *it + HalfSize;
2533 }
2534 } else { // cross-lane
2535 return false;
2536 }
2537
2538 return true;
2539}
2540
2541/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2542/// Only for 256-bit vector.
2543///
2544/// For example:
2545/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2546/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2547/// is lowerded to:
2548/// (XVPERMI $xr2, $xr0, 78)
2549/// (XVSHUF $xr1, $xr2, $xr0)
2550/// (XVORI $xr0, $xr1, 0)
2552 ArrayRef<int> Mask,
2553 MVT VT, SDValue V1,
2554 SDValue V2,
2555 SelectionDAG &DAG) {
2556 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2557 int Size = Mask.size();
2558 int LaneSize = Size / 2;
2559
2560 bool LaneCrossing[2] = {false, false};
2561 for (int i = 0; i < Size; ++i)
2562 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2563 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2564
2565 // Ensure that all lanes ared involved.
2566 if (!LaneCrossing[0] && !LaneCrossing[1])
2567 return SDValue();
2568
2569 SmallVector<int> InLaneMask;
2570 InLaneMask.assign(Mask.begin(), Mask.end());
2571 for (int i = 0; i < Size; ++i) {
2572 int &M = InLaneMask[i];
2573 if (M < 0)
2574 continue;
2575 if (((M % Size) / LaneSize) != (i / LaneSize))
2576 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2577 }
2578
2579 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2580 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2581 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2582 Flipped = DAG.getBitcast(VT, Flipped);
2583 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2584}
2585
2586/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2587///
2588/// This routine breaks down the specific type of 256-bit shuffle and
2589/// dispatches to the lowering routines accordingly.
2591 SDValue V1, SDValue V2, SelectionDAG &DAG,
2592 const LoongArchSubtarget &Subtarget) {
2593 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2594 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2595 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2596 "Vector type is unsupported for lasx!");
2598 "Two operands have different types!");
2599 assert(VT.getVectorNumElements() == Mask.size() &&
2600 "Unexpected mask size for shuffle!");
2601 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2602 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2603
2604 APInt KnownUndef, KnownZero;
2605 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2606 APInt Zeroable = KnownUndef | KnownZero;
2607
2608 SDValue Result;
2609 // TODO: Add more comparison patterns.
2610 if (V2.isUndef()) {
2611 if ((Result =
2612 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2613 return Result;
2614 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2615 Subtarget)))
2616 return Result;
2617 if ((Result =
2618 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2619 return Result;
2620 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2621 return Result;
2622
2623 // TODO: This comment may be enabled in the future to better match the
2624 // pattern for instruction selection.
2625 /* V2 = V1; */
2626 }
2627
2628 // It is recommended not to change the pattern comparison order for better
2629 // performance.
2630 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2631 return Result;
2632 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2633 return Result;
2634 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2635 return Result;
2636 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2637 return Result;
2638 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2639 return Result;
2640 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2641 return Result;
2642 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2643 Zeroable)))
2644 return Result;
2645 if ((Result =
2646 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2647 return Result;
2648 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2649 Subtarget)))
2650 return Result;
2651
2652 // canonicalize non cross-lane shuffle vector
2653 SmallVector<int> NewMask(Mask);
2654 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2655 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2656
2657 // FIXME: Handling the remaining cases earlier can degrade performance
2658 // in some situations. Further analysis is required to enable more
2659 // effective optimizations.
2660 if (V2.isUndef()) {
2661 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2662 V1, V2, DAG)))
2663 return Result;
2664 }
2665
2666 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2667 return NewShuffle;
2668 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2669 return Result;
2670
2671 return SDValue();
2672}
2673
2674SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2675 SelectionDAG &DAG) const {
2676 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2677 ArrayRef<int> OrigMask = SVOp->getMask();
2678 SDValue V1 = Op.getOperand(0);
2679 SDValue V2 = Op.getOperand(1);
2680 MVT VT = Op.getSimpleValueType();
2681 int NumElements = VT.getVectorNumElements();
2682 SDLoc DL(Op);
2683
2684 bool V1IsUndef = V1.isUndef();
2685 bool V2IsUndef = V2.isUndef();
2686 if (V1IsUndef && V2IsUndef)
2687 return DAG.getUNDEF(VT);
2688
2689 // When we create a shuffle node we put the UNDEF node to second operand,
2690 // but in some cases the first operand may be transformed to UNDEF.
2691 // In this case we should just commute the node.
2692 if (V1IsUndef)
2693 return DAG.getCommutedVectorShuffle(*SVOp);
2694
2695 // Check for non-undef masks pointing at an undef vector and make the masks
2696 // undef as well. This makes it easier to match the shuffle based solely on
2697 // the mask.
2698 if (V2IsUndef &&
2699 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2700 SmallVector<int, 8> NewMask(OrigMask);
2701 for (int &M : NewMask)
2702 if (M >= NumElements)
2703 M = -1;
2704 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2705 }
2706
2707 // Check for illegal shuffle mask element index values.
2708 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2709 (void)MaskUpperLimit;
2710 assert(llvm::all_of(OrigMask,
2711 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2712 "Out of bounds shuffle index");
2713
2714 // For each vector width, delegate to a specialized lowering routine.
2715 if (VT.is128BitVector())
2716 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2717
2718 if (VT.is256BitVector())
2719 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2720
2721 return SDValue();
2722}
2723
2724SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2725 SelectionDAG &DAG) const {
2726 // Custom lower to ensure the libcall return is passed in an FPR on hard
2727 // float ABIs.
2728 SDLoc DL(Op);
2729 MakeLibCallOptions CallOptions;
2730 SDValue Op0 = Op.getOperand(0);
2731 SDValue Chain = SDValue();
2732 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2733 SDValue Res;
2734 std::tie(Res, Chain) =
2735 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2736 if (Subtarget.is64Bit())
2737 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2738 return DAG.getBitcast(MVT::i32, Res);
2739}
2740
2741SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2742 SelectionDAG &DAG) const {
2743 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2744 // float ABIs.
2745 SDLoc DL(Op);
2746 MakeLibCallOptions CallOptions;
2747 SDValue Op0 = Op.getOperand(0);
2748 SDValue Chain = SDValue();
2749 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2750 DL, MVT::f32, Op0)
2751 : DAG.getBitcast(MVT::f32, Op0);
2752 SDValue Res;
2753 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2754 CallOptions, DL, Chain);
2755 return Res;
2756}
2757
2758SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2759 SelectionDAG &DAG) const {
2760 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2761 SDLoc DL(Op);
2762 MakeLibCallOptions CallOptions;
2763 RTLIB::Libcall LC =
2764 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2765 SDValue Res =
2766 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2767 if (Subtarget.is64Bit())
2768 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2769 return DAG.getBitcast(MVT::i32, Res);
2770}
2771
2772SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2773 SelectionDAG &DAG) const {
2774 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2775 MVT VT = Op.getSimpleValueType();
2776 SDLoc DL(Op);
2777 Op = DAG.getNode(
2778 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2779 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2780 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2781 DL, MVT::f32, Op)
2782 : DAG.getBitcast(MVT::f32, Op);
2783 if (VT != MVT::f32)
2784 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2785 return Res;
2786}
2787
2788// Lower BUILD_VECTOR as broadcast load (if possible).
2789// For example:
2790// %a = load i8, ptr %ptr
2791// %b = build_vector %a, %a, %a, %a
2792// is lowered to :
2793// (VLDREPL_B $a0, 0)
2795 const SDLoc &DL,
2796 SelectionDAG &DAG) {
2797 MVT VT = BVOp->getSimpleValueType(0);
2798 int NumOps = BVOp->getNumOperands();
2799
2800 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2801 "Unsupported vector type for broadcast.");
2802
2803 SDValue IdentitySrc;
2804 bool IsIdeneity = true;
2805
2806 for (int i = 0; i != NumOps; i++) {
2807 SDValue Op = BVOp->getOperand(i);
2808 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2809 IsIdeneity = false;
2810 break;
2811 }
2812 IdentitySrc = BVOp->getOperand(0);
2813 }
2814
2815 // make sure that this load is valid and only has one user.
2816 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2817 return SDValue();
2818
2819 auto *LN = cast<LoadSDNode>(IdentitySrc);
2820 auto ExtType = LN->getExtensionType();
2821
2822 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2823 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2824 SDVTList Tys =
2825 LN->isIndexed()
2826 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2827 : DAG.getVTList(VT, MVT::Other);
2828 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2829 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2830 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2831 return BCast;
2832 }
2833 return SDValue();
2834}
2835
2836// Sequentially insert elements from Ops into Vector, from low to high indices.
2837// Note: Ops can have fewer elements than Vector.
2839 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2840 EVT ResTy) {
2841 assert(Ops.size() <= ResTy.getVectorNumElements());
2842
2843 SDValue Op0 = Ops[0];
2844 if (!Op0.isUndef())
2845 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2846 for (unsigned i = 1; i < Ops.size(); ++i) {
2847 SDValue Opi = Ops[i];
2848 if (Opi.isUndef())
2849 continue;
2850 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2851 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2852 }
2853}
2854
2855// Build a ResTy subvector from Node, taking NumElts elements starting at index
2856// 'first'.
2858 SelectionDAG &DAG, SDLoc DL,
2859 const LoongArchSubtarget &Subtarget,
2860 EVT ResTy, unsigned first) {
2861 unsigned NumElts = ResTy.getVectorNumElements();
2862
2863 assert(first >= 0 &&
2864 first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2865
2866 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2867 Node->op_begin() + first + NumElts);
2868 SDValue Vector = DAG.getUNDEF(ResTy);
2869 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2870 return Vector;
2871}
2872
2873SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2874 SelectionDAG &DAG) const {
2875 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2876 MVT VT = Node->getSimpleValueType(0);
2877 EVT ResTy = Op->getValueType(0);
2878 unsigned NumElts = ResTy.getVectorNumElements();
2879 SDLoc DL(Op);
2880 APInt SplatValue, SplatUndef;
2881 unsigned SplatBitSize;
2882 bool HasAnyUndefs;
2883 bool IsConstant = false;
2884 bool UseSameConstant = true;
2885 SDValue ConstantValue;
2886 bool Is128Vec = ResTy.is128BitVector();
2887 bool Is256Vec = ResTy.is256BitVector();
2888
2889 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2890 (!Subtarget.hasExtLASX() || !Is256Vec))
2891 return SDValue();
2892
2893 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2894 return Result;
2895
2896 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2897 /*MinSplatBits=*/8) &&
2898 SplatBitSize <= 64) {
2899 // We can only cope with 8, 16, 32, or 64-bit elements.
2900 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2901 SplatBitSize != 64)
2902 return SDValue();
2903
2904 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2905 // We can only handle 64-bit elements that are within
2906 // the signed 10-bit range or match vldi patterns on 32-bit targets.
2907 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2908 if (!SplatValue.isSignedIntN(10) &&
2909 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
2910 return SDValue();
2911 if ((Is128Vec && ResTy == MVT::v4i32) ||
2912 (Is256Vec && ResTy == MVT::v8i32))
2913 return Op;
2914 }
2915
2916 EVT ViaVecTy;
2917
2918 switch (SplatBitSize) {
2919 default:
2920 return SDValue();
2921 case 8:
2922 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2923 break;
2924 case 16:
2925 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2926 break;
2927 case 32:
2928 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2929 break;
2930 case 64:
2931 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2932 break;
2933 }
2934
2935 // SelectionDAG::getConstant will promote SplatValue appropriately.
2936 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2937
2938 // Bitcast to the type we originally wanted.
2939 if (ViaVecTy != ResTy)
2940 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2941
2942 return Result;
2943 }
2944
2945 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2946 return Op;
2947
2948 for (unsigned i = 0; i < NumElts; ++i) {
2949 SDValue Opi = Node->getOperand(i);
2950 if (isIntOrFPConstant(Opi)) {
2951 IsConstant = true;
2952 if (!ConstantValue.getNode())
2953 ConstantValue = Opi;
2954 else if (ConstantValue != Opi)
2955 UseSameConstant = false;
2956 }
2957 }
2958
2959 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2960 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2961 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2962 for (unsigned i = 0; i < NumElts; ++i) {
2963 SDValue Opi = Node->getOperand(i);
2964 if (!isIntOrFPConstant(Opi))
2965 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2966 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2967 }
2968 return Result;
2969 }
2970
2971 if (!IsConstant) {
2972 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2973 // the sub-sequence of the vector and then broadcast the sub-sequence.
2974 //
2975 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2976 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2977 // generates worse code in some cases. This could be further optimized
2978 // with more consideration.
2980 BitVector UndefElements;
2981 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2982 UndefElements.count() == 0) {
2983 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2984 // because the high part can be simply treated as undef.
2985 SDValue Vector = DAG.getUNDEF(ResTy);
2986 EVT FillTy = Is256Vec
2988 : ResTy;
2989 SDValue FillVec =
2990 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
2991
2992 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
2993
2994 unsigned SeqLen = Sequence.size();
2995 unsigned SplatLen = NumElts / SeqLen;
2996 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2997 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2998
2999 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3000 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3001 if (SplatEltTy == MVT::i128)
3002 SplatTy = MVT::v4i64;
3003
3004 SDValue SplatVec;
3005 SDValue SrcVec = DAG.getBitcast(
3006 SplatTy,
3007 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3008 if (Is256Vec) {
3009 SplatVec =
3010 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3012 DL, SplatTy, SrcVec);
3013 } else {
3014 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3015 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3016 }
3017
3018 return DAG.getBitcast(ResTy, SplatVec);
3019 }
3020
3021 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3022 // using memory operations is much lower.
3023 //
3024 // For 256-bit vectors, normally split into two halves and concatenate.
3025 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3026 // one non-undef element, skip spliting to avoid a worse result.
3027 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3028 ResTy == MVT::v4f64) {
3029 unsigned NonUndefCount = 0;
3030 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3031 if (!Node->getOperand(i).isUndef()) {
3032 ++NonUndefCount;
3033 if (NonUndefCount > 1)
3034 break;
3035 }
3036 }
3037 if (NonUndefCount == 1)
3038 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3039 }
3040
3041 EVT VecTy =
3042 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3043 SDValue Vector =
3044 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3045
3046 if (Is128Vec)
3047 return Vector;
3048
3049 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3050 VecTy, NumElts / 2);
3051
3052 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3053 }
3054
3055 return SDValue();
3056}
3057
3058SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3059 SelectionDAG &DAG) const {
3060 SDLoc DL(Op);
3061 MVT ResVT = Op.getSimpleValueType();
3062 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3063
3064 unsigned NumOperands = Op.getNumOperands();
3065 unsigned NumFreezeUndef = 0;
3066 unsigned NumZero = 0;
3067 unsigned NumNonZero = 0;
3068 unsigned NonZeros = 0;
3069 SmallSet<SDValue, 4> Undefs;
3070 for (unsigned i = 0; i != NumOperands; ++i) {
3071 SDValue SubVec = Op.getOperand(i);
3072 if (SubVec.isUndef())
3073 continue;
3074 if (ISD::isFreezeUndef(SubVec.getNode())) {
3075 // If the freeze(undef) has multiple uses then we must fold to zero.
3076 if (SubVec.hasOneUse()) {
3077 ++NumFreezeUndef;
3078 } else {
3079 ++NumZero;
3080 Undefs.insert(SubVec);
3081 }
3082 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3083 ++NumZero;
3084 else {
3085 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3086 NonZeros |= 1 << i;
3087 ++NumNonZero;
3088 }
3089 }
3090
3091 // If we have more than 2 non-zeros, build each half separately.
3092 if (NumNonZero > 2) {
3093 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3094 ArrayRef<SDUse> Ops = Op->ops();
3095 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3096 Ops.slice(0, NumOperands / 2));
3097 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3098 Ops.slice(NumOperands / 2));
3099 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3100 }
3101
3102 // Otherwise, build it up through insert_subvectors.
3103 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3104 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3105 : DAG.getUNDEF(ResVT));
3106
3107 // Replace Undef operands with ZeroVector.
3108 for (SDValue U : Undefs)
3109 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3110
3111 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3112 unsigned NumSubElems = SubVT.getVectorNumElements();
3113 for (unsigned i = 0; i != NumOperands; ++i) {
3114 if ((NonZeros & (1 << i)) == 0)
3115 continue;
3116
3117 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3118 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3119 }
3120
3121 return Vec;
3122}
3123
3124SDValue
3125LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3126 SelectionDAG &DAG) const {
3127 MVT EltVT = Op.getSimpleValueType();
3128 SDValue Vec = Op->getOperand(0);
3129 EVT VecTy = Vec->getValueType(0);
3130 SDValue Idx = Op->getOperand(1);
3131 SDLoc DL(Op);
3132 MVT GRLenVT = Subtarget.getGRLenVT();
3133
3134 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3135
3136 if (isa<ConstantSDNode>(Idx))
3137 return Op;
3138
3139 switch (VecTy.getSimpleVT().SimpleTy) {
3140 default:
3141 llvm_unreachable("Unexpected type");
3142 case MVT::v32i8:
3143 case MVT::v16i16:
3144 case MVT::v4i64:
3145 case MVT::v4f64: {
3146 // Extract the high half subvector and place it to the low half of a new
3147 // vector. It doesn't matter what the high half of the new vector is.
3148 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3149 SDValue VecHi =
3150 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3151 SDValue TmpVec =
3152 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3153 VecHi, DAG.getConstant(0, DL, GRLenVT));
3154
3155 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3156 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3157 // desired element.
3158 SDValue IdxCp =
3159 Subtarget.is64Bit()
3160 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3161 : DAG.getBitcast(MVT::f32, Idx);
3162 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3163 SDValue MaskVec =
3164 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3165 SDValue ResVec =
3166 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3167
3168 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3169 DAG.getConstant(0, DL, GRLenVT));
3170 }
3171 case MVT::v8i32:
3172 case MVT::v8f32: {
3173 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3174 SDValue SplatValue =
3175 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3176
3177 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3178 DAG.getConstant(0, DL, GRLenVT));
3179 }
3180 }
3181}
3182
3183SDValue
3184LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3185 SelectionDAG &DAG) const {
3186 MVT VT = Op.getSimpleValueType();
3187 MVT EltVT = VT.getVectorElementType();
3188 unsigned NumElts = VT.getVectorNumElements();
3189 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3190 SDLoc DL(Op);
3191 SDValue Op0 = Op.getOperand(0);
3192 SDValue Op1 = Op.getOperand(1);
3193 SDValue Op2 = Op.getOperand(2);
3194
3195 if (isa<ConstantSDNode>(Op2))
3196 return Op;
3197
3198 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3199 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3200
3201 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3202 return SDValue();
3203
3204 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3205 SmallVector<SDValue, 32> RawIndices;
3206 SDValue SplatIdx;
3207 SDValue Indices;
3208
3209 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3210 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3211 for (unsigned i = 0; i < NumElts; ++i) {
3212 RawIndices.push_back(Op2);
3213 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3214 }
3215 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3216 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3217
3218 RawIndices.clear();
3219 for (unsigned i = 0; i < NumElts; ++i) {
3220 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3221 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3222 }
3223 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3224 Indices = DAG.getBitcast(IdxVTy, Indices);
3225 } else {
3226 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3227
3228 for (unsigned i = 0; i < NumElts; ++i)
3229 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3230 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3231 }
3232
3233 // insert vec, elt, idx
3234 // =>
3235 // select (splatidx == {0,1,2...}) ? splatelt : vec
3236 SDValue SelectCC =
3237 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3238 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3239}
3240
3241SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3242 SelectionDAG &DAG) const {
3243 SDLoc DL(Op);
3244 SyncScope::ID FenceSSID =
3245 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3246
3247 // singlethread fences only synchronize with signal handlers on the same
3248 // thread and thus only need to preserve instruction order, not actually
3249 // enforce memory ordering.
3250 if (FenceSSID == SyncScope::SingleThread)
3251 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3252 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3253
3254 return Op;
3255}
3256
3257SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3258 SelectionDAG &DAG) const {
3259
3260 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3261 DAG.getContext()->emitError(
3262 "On LA64, only 64-bit registers can be written.");
3263 return Op.getOperand(0);
3264 }
3265
3266 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3267 DAG.getContext()->emitError(
3268 "On LA32, only 32-bit registers can be written.");
3269 return Op.getOperand(0);
3270 }
3271
3272 return Op;
3273}
3274
3275SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3276 SelectionDAG &DAG) const {
3277 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3278 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3279 "be a constant integer");
3280 return SDValue();
3281 }
3282
3283 MachineFunction &MF = DAG.getMachineFunction();
3285 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3286 EVT VT = Op.getValueType();
3287 SDLoc DL(Op);
3288 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3289 unsigned Depth = Op.getConstantOperandVal(0);
3290 int GRLenInBytes = Subtarget.getGRLen() / 8;
3291
3292 while (Depth--) {
3293 int Offset = -(GRLenInBytes * 2);
3294 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3295 DAG.getSignedConstant(Offset, DL, VT));
3296 FrameAddr =
3297 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3298 }
3299 return FrameAddr;
3300}
3301
3302SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3303 SelectionDAG &DAG) const {
3304 // Currently only support lowering return address for current frame.
3305 if (Op.getConstantOperandVal(0) != 0) {
3306 DAG.getContext()->emitError(
3307 "return address can only be determined for the current frame");
3308 return SDValue();
3309 }
3310
3311 MachineFunction &MF = DAG.getMachineFunction();
3313 MVT GRLenVT = Subtarget.getGRLenVT();
3314
3315 // Return the value of the return address register, marking it an implicit
3316 // live-in.
3317 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3318 getRegClassFor(GRLenVT));
3319 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3320}
3321
3322SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3323 SelectionDAG &DAG) const {
3324 MachineFunction &MF = DAG.getMachineFunction();
3325 auto Size = Subtarget.getGRLen() / 8;
3326 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3327 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3328}
3329
3330SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3331 SelectionDAG &DAG) const {
3332 MachineFunction &MF = DAG.getMachineFunction();
3333 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3334
3335 SDLoc DL(Op);
3336 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3338
3339 // vastart just stores the address of the VarArgsFrameIndex slot into the
3340 // memory location argument.
3341 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3342 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3343 MachinePointerInfo(SV));
3344}
3345
3346SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3347 SelectionDAG &DAG) const {
3348 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3349 !Subtarget.hasBasicD() && "unexpected target features");
3350
3351 SDLoc DL(Op);
3352 SDValue Op0 = Op.getOperand(0);
3353 if (Op0->getOpcode() == ISD::AND) {
3354 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3355 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3356 return Op;
3357 }
3358
3359 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3360 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3361 Op0.getConstantOperandVal(2) == UINT64_C(0))
3362 return Op;
3363
3364 if (Op0.getOpcode() == ISD::AssertZext &&
3365 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3366 return Op;
3367
3368 EVT OpVT = Op0.getValueType();
3369 EVT RetVT = Op.getValueType();
3370 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3371 MakeLibCallOptions CallOptions;
3372 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3373 SDValue Chain = SDValue();
3375 std::tie(Result, Chain) =
3376 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3377 return Result;
3378}
3379
3380SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3381 SelectionDAG &DAG) const {
3382 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3383 !Subtarget.hasBasicD() && "unexpected target features");
3384
3385 SDLoc DL(Op);
3386 SDValue Op0 = Op.getOperand(0);
3387
3388 if ((Op0.getOpcode() == ISD::AssertSext ||
3390 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3391 return Op;
3392
3393 EVT OpVT = Op0.getValueType();
3394 EVT RetVT = Op.getValueType();
3395 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3396 MakeLibCallOptions CallOptions;
3397 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3398 SDValue Chain = SDValue();
3400 std::tie(Result, Chain) =
3401 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3402 return Result;
3403}
3404
3405SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3406 SelectionDAG &DAG) const {
3407
3408 SDLoc DL(Op);
3409 EVT VT = Op.getValueType();
3410 SDValue Op0 = Op.getOperand(0);
3411 EVT Op0VT = Op0.getValueType();
3412
3413 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3414 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3415 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3416 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3417 }
3418 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3419 SDValue Lo, Hi;
3420 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3421 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3422 }
3423 return Op;
3424}
3425
3426SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3427 SelectionDAG &DAG) const {
3428
3429 SDLoc DL(Op);
3430 SDValue Op0 = Op.getOperand(0);
3431
3432 if (Op0.getValueType() == MVT::f16)
3433 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3434
3435 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3436 !Subtarget.hasBasicD()) {
3437 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3438 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3439 }
3440
3441 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3442 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3443 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3444}
3445
3447 SelectionDAG &DAG, unsigned Flags) {
3448 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3449}
3450
3452 SelectionDAG &DAG, unsigned Flags) {
3453 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3454 Flags);
3455}
3456
3458 SelectionDAG &DAG, unsigned Flags) {
3459 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3460 N->getOffset(), Flags);
3461}
3462
3464 SelectionDAG &DAG, unsigned Flags) {
3465 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3466}
3467
3468template <class NodeTy>
3469SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3471 bool IsLocal) const {
3472 SDLoc DL(N);
3473 EVT Ty = getPointerTy(DAG.getDataLayout());
3474 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3475 SDValue Load;
3476
3477 switch (M) {
3478 default:
3479 report_fatal_error("Unsupported code model");
3480
3481 case CodeModel::Large: {
3482 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3483
3484 // This is not actually used, but is necessary for successfully matching
3485 // the PseudoLA_*_LARGE nodes.
3486 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3487 if (IsLocal) {
3488 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3489 // eventually becomes the desired 5-insn code sequence.
3490 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3491 Tmp, Addr),
3492 0);
3493 } else {
3494 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3495 // eventually becomes the desired 5-insn code sequence.
3496 Load = SDValue(
3497 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3498 0);
3499 }
3500 break;
3501 }
3502
3503 case CodeModel::Small:
3504 case CodeModel::Medium:
3505 if (IsLocal) {
3506 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3507 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3508 Load = SDValue(
3509 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3510 } else {
3511 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3512 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3513 Load =
3514 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3515 }
3516 }
3517
3518 if (!IsLocal) {
3519 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3520 MachineFunction &MF = DAG.getMachineFunction();
3521 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3525 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3526 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3527 }
3528
3529 return Load;
3530}
3531
3532SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3533 SelectionDAG &DAG) const {
3534 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3535 DAG.getTarget().getCodeModel());
3536}
3537
3538SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3539 SelectionDAG &DAG) const {
3540 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3541 DAG.getTarget().getCodeModel());
3542}
3543
3544SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3545 SelectionDAG &DAG) const {
3546 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3547 DAG.getTarget().getCodeModel());
3548}
3549
3550SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3551 SelectionDAG &DAG) const {
3552 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3553 assert(N->getOffset() == 0 && "unexpected offset in global node");
3554 auto CM = DAG.getTarget().getCodeModel();
3555 const GlobalValue *GV = N->getGlobal();
3556
3557 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3558 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3559 CM = *GCM;
3560 }
3561
3562 return getAddr(N, DAG, CM, GV->isDSOLocal());
3563}
3564
3565SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3566 SelectionDAG &DAG,
3567 unsigned Opc, bool UseGOT,
3568 bool Large) const {
3569 SDLoc DL(N);
3570 EVT Ty = getPointerTy(DAG.getDataLayout());
3571 MVT GRLenVT = Subtarget.getGRLenVT();
3572
3573 // This is not actually used, but is necessary for successfully matching the
3574 // PseudoLA_*_LARGE nodes.
3575 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3576 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3577
3578 // Only IE needs an extra argument for large code model.
3579 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3580 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3581 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3582
3583 // If it is LE for normal/medium code model, the add tp operation will occur
3584 // during the pseudo-instruction expansion.
3585 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3586 return Offset;
3587
3588 if (UseGOT) {
3589 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3590 MachineFunction &MF = DAG.getMachineFunction();
3591 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3595 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3596 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3597 }
3598
3599 // Add the thread pointer.
3600 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3601 DAG.getRegister(LoongArch::R2, GRLenVT));
3602}
3603
3604SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3605 SelectionDAG &DAG,
3606 unsigned Opc,
3607 bool Large) const {
3608 SDLoc DL(N);
3609 EVT Ty = getPointerTy(DAG.getDataLayout());
3610 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3611
3612 // This is not actually used, but is necessary for successfully matching the
3613 // PseudoLA_*_LARGE nodes.
3614 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3615
3616 // Use a PC-relative addressing mode to access the dynamic GOT address.
3617 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3618 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3619 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3620
3621 // Prepare argument list to generate call.
3623 Args.emplace_back(Load, CallTy);
3624
3625 // Setup call to __tls_get_addr.
3626 TargetLowering::CallLoweringInfo CLI(DAG);
3627 CLI.setDebugLoc(DL)
3628 .setChain(DAG.getEntryNode())
3629 .setLibCallee(CallingConv::C, CallTy,
3630 DAG.getExternalSymbol("__tls_get_addr", Ty),
3631 std::move(Args));
3632
3633 return LowerCallTo(CLI).first;
3634}
3635
3636SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3637 SelectionDAG &DAG, unsigned Opc,
3638 bool Large) const {
3639 SDLoc DL(N);
3640 EVT Ty = getPointerTy(DAG.getDataLayout());
3641 const GlobalValue *GV = N->getGlobal();
3642
3643 // This is not actually used, but is necessary for successfully matching the
3644 // PseudoLA_*_LARGE nodes.
3645 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3646
3647 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3648 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3649 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3650 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3651 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3652}
3653
3654SDValue
3655LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3656 SelectionDAG &DAG) const {
3659 report_fatal_error("In GHC calling convention TLS is not supported");
3660
3661 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3662 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3663
3664 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3665 assert(N->getOffset() == 0 && "unexpected offset in global node");
3666
3667 if (DAG.getTarget().useEmulatedTLS())
3668 reportFatalUsageError("the emulated TLS is prohibited");
3669
3670 bool IsDesc = DAG.getTarget().useTLSDESC();
3671
3672 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3674 // In this model, application code calls the dynamic linker function
3675 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3676 // runtime.
3677 if (!IsDesc)
3678 return getDynamicTLSAddr(N, DAG,
3679 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3680 : LoongArch::PseudoLA_TLS_GD,
3681 Large);
3682 break;
3684 // Same as GeneralDynamic, except for assembly modifiers and relocation
3685 // records.
3686 if (!IsDesc)
3687 return getDynamicTLSAddr(N, DAG,
3688 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3689 : LoongArch::PseudoLA_TLS_LD,
3690 Large);
3691 break;
3693 // This model uses the GOT to resolve TLS offsets.
3694 return getStaticTLSAddr(N, DAG,
3695 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3696 : LoongArch::PseudoLA_TLS_IE,
3697 /*UseGOT=*/true, Large);
3699 // This model is used when static linking as the TLS offsets are resolved
3700 // during program linking.
3701 //
3702 // This node doesn't need an extra argument for the large code model.
3703 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3704 /*UseGOT=*/false, Large);
3705 }
3706
3707 return getTLSDescAddr(N, DAG,
3708 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3709 : LoongArch::PseudoLA_TLS_DESC,
3710 Large);
3711}
3712
3713template <unsigned N>
3715 SelectionDAG &DAG, bool IsSigned = false) {
3716 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3717 // Check the ImmArg.
3718 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3719 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3720 DAG.getContext()->emitError(Op->getOperationName(0) +
3721 ": argument out of range.");
3722 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3723 }
3724 return SDValue();
3725}
3726
3727SDValue
3728LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3729 SelectionDAG &DAG) const {
3730 switch (Op.getConstantOperandVal(0)) {
3731 default:
3732 return SDValue(); // Don't custom lower most intrinsics.
3733 case Intrinsic::thread_pointer: {
3734 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3735 return DAG.getRegister(LoongArch::R2, PtrVT);
3736 }
3737 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3738 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3739 case Intrinsic::loongarch_lsx_vreplvei_d:
3740 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3741 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3742 case Intrinsic::loongarch_lsx_vreplvei_w:
3743 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3744 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3745 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3746 case Intrinsic::loongarch_lasx_xvpickve_d:
3747 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3748 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3749 case Intrinsic::loongarch_lasx_xvinsve0_d:
3750 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3751 case Intrinsic::loongarch_lsx_vsat_b:
3752 case Intrinsic::loongarch_lsx_vsat_bu:
3753 case Intrinsic::loongarch_lsx_vrotri_b:
3754 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3755 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3756 case Intrinsic::loongarch_lsx_vsrlri_b:
3757 case Intrinsic::loongarch_lsx_vsrari_b:
3758 case Intrinsic::loongarch_lsx_vreplvei_h:
3759 case Intrinsic::loongarch_lasx_xvsat_b:
3760 case Intrinsic::loongarch_lasx_xvsat_bu:
3761 case Intrinsic::loongarch_lasx_xvrotri_b:
3762 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3763 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3764 case Intrinsic::loongarch_lasx_xvsrlri_b:
3765 case Intrinsic::loongarch_lasx_xvsrari_b:
3766 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3767 case Intrinsic::loongarch_lasx_xvpickve_w:
3768 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3769 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3770 case Intrinsic::loongarch_lasx_xvinsve0_w:
3771 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3772 case Intrinsic::loongarch_lsx_vsat_h:
3773 case Intrinsic::loongarch_lsx_vsat_hu:
3774 case Intrinsic::loongarch_lsx_vrotri_h:
3775 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3776 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3777 case Intrinsic::loongarch_lsx_vsrlri_h:
3778 case Intrinsic::loongarch_lsx_vsrari_h:
3779 case Intrinsic::loongarch_lsx_vreplvei_b:
3780 case Intrinsic::loongarch_lasx_xvsat_h:
3781 case Intrinsic::loongarch_lasx_xvsat_hu:
3782 case Intrinsic::loongarch_lasx_xvrotri_h:
3783 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3784 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3785 case Intrinsic::loongarch_lasx_xvsrlri_h:
3786 case Intrinsic::loongarch_lasx_xvsrari_h:
3787 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3788 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3789 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3790 case Intrinsic::loongarch_lsx_vsrani_b_h:
3791 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3792 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3793 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3794 case Intrinsic::loongarch_lsx_vssrani_b_h:
3795 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3796 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3797 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3798 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3799 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3800 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3801 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3802 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3803 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3804 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3805 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3806 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3807 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3808 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3809 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3810 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3811 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3812 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3813 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3814 case Intrinsic::loongarch_lsx_vsat_w:
3815 case Intrinsic::loongarch_lsx_vsat_wu:
3816 case Intrinsic::loongarch_lsx_vrotri_w:
3817 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3818 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3819 case Intrinsic::loongarch_lsx_vsrlri_w:
3820 case Intrinsic::loongarch_lsx_vsrari_w:
3821 case Intrinsic::loongarch_lsx_vslei_bu:
3822 case Intrinsic::loongarch_lsx_vslei_hu:
3823 case Intrinsic::loongarch_lsx_vslei_wu:
3824 case Intrinsic::loongarch_lsx_vslei_du:
3825 case Intrinsic::loongarch_lsx_vslti_bu:
3826 case Intrinsic::loongarch_lsx_vslti_hu:
3827 case Intrinsic::loongarch_lsx_vslti_wu:
3828 case Intrinsic::loongarch_lsx_vslti_du:
3829 case Intrinsic::loongarch_lsx_vbsll_v:
3830 case Intrinsic::loongarch_lsx_vbsrl_v:
3831 case Intrinsic::loongarch_lasx_xvsat_w:
3832 case Intrinsic::loongarch_lasx_xvsat_wu:
3833 case Intrinsic::loongarch_lasx_xvrotri_w:
3834 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3835 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3836 case Intrinsic::loongarch_lasx_xvsrlri_w:
3837 case Intrinsic::loongarch_lasx_xvsrari_w:
3838 case Intrinsic::loongarch_lasx_xvslei_bu:
3839 case Intrinsic::loongarch_lasx_xvslei_hu:
3840 case Intrinsic::loongarch_lasx_xvslei_wu:
3841 case Intrinsic::loongarch_lasx_xvslei_du:
3842 case Intrinsic::loongarch_lasx_xvslti_bu:
3843 case Intrinsic::loongarch_lasx_xvslti_hu:
3844 case Intrinsic::loongarch_lasx_xvslti_wu:
3845 case Intrinsic::loongarch_lasx_xvslti_du:
3846 case Intrinsic::loongarch_lasx_xvbsll_v:
3847 case Intrinsic::loongarch_lasx_xvbsrl_v:
3848 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3849 case Intrinsic::loongarch_lsx_vseqi_b:
3850 case Intrinsic::loongarch_lsx_vseqi_h:
3851 case Intrinsic::loongarch_lsx_vseqi_w:
3852 case Intrinsic::loongarch_lsx_vseqi_d:
3853 case Intrinsic::loongarch_lsx_vslei_b:
3854 case Intrinsic::loongarch_lsx_vslei_h:
3855 case Intrinsic::loongarch_lsx_vslei_w:
3856 case Intrinsic::loongarch_lsx_vslei_d:
3857 case Intrinsic::loongarch_lsx_vslti_b:
3858 case Intrinsic::loongarch_lsx_vslti_h:
3859 case Intrinsic::loongarch_lsx_vslti_w:
3860 case Intrinsic::loongarch_lsx_vslti_d:
3861 case Intrinsic::loongarch_lasx_xvseqi_b:
3862 case Intrinsic::loongarch_lasx_xvseqi_h:
3863 case Intrinsic::loongarch_lasx_xvseqi_w:
3864 case Intrinsic::loongarch_lasx_xvseqi_d:
3865 case Intrinsic::loongarch_lasx_xvslei_b:
3866 case Intrinsic::loongarch_lasx_xvslei_h:
3867 case Intrinsic::loongarch_lasx_xvslei_w:
3868 case Intrinsic::loongarch_lasx_xvslei_d:
3869 case Intrinsic::loongarch_lasx_xvslti_b:
3870 case Intrinsic::loongarch_lasx_xvslti_h:
3871 case Intrinsic::loongarch_lasx_xvslti_w:
3872 case Intrinsic::loongarch_lasx_xvslti_d:
3873 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3874 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3875 case Intrinsic::loongarch_lsx_vsrani_h_w:
3876 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3877 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3878 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3879 case Intrinsic::loongarch_lsx_vssrani_h_w:
3880 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3881 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3882 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3883 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3884 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3885 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3886 case Intrinsic::loongarch_lsx_vfrstpi_b:
3887 case Intrinsic::loongarch_lsx_vfrstpi_h:
3888 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3889 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3890 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3891 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3892 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3893 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3894 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3895 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3896 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3897 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3898 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3899 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3900 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3901 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3902 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3903 case Intrinsic::loongarch_lsx_vsat_d:
3904 case Intrinsic::loongarch_lsx_vsat_du:
3905 case Intrinsic::loongarch_lsx_vrotri_d:
3906 case Intrinsic::loongarch_lsx_vsrlri_d:
3907 case Intrinsic::loongarch_lsx_vsrari_d:
3908 case Intrinsic::loongarch_lasx_xvsat_d:
3909 case Intrinsic::loongarch_lasx_xvsat_du:
3910 case Intrinsic::loongarch_lasx_xvrotri_d:
3911 case Intrinsic::loongarch_lasx_xvsrlri_d:
3912 case Intrinsic::loongarch_lasx_xvsrari_d:
3913 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3914 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3915 case Intrinsic::loongarch_lsx_vsrani_w_d:
3916 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3917 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3918 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3919 case Intrinsic::loongarch_lsx_vssrani_w_d:
3920 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3921 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3922 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3923 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3924 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3925 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3926 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3927 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3928 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3929 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3930 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3931 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3932 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3933 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3934 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3935 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3936 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3937 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3938 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3939 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3940 case Intrinsic::loongarch_lsx_vsrani_d_q:
3941 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3942 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3943 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3944 case Intrinsic::loongarch_lsx_vssrani_d_q:
3945 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3946 case Intrinsic::loongarch_lsx_vssrani_du_q:
3947 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3948 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3949 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3950 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3951 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3952 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3953 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3954 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3955 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3956 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3957 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3958 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3959 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3960 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3961 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3962 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3963 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3964 case Intrinsic::loongarch_lsx_vnori_b:
3965 case Intrinsic::loongarch_lsx_vshuf4i_b:
3966 case Intrinsic::loongarch_lsx_vshuf4i_h:
3967 case Intrinsic::loongarch_lsx_vshuf4i_w:
3968 case Intrinsic::loongarch_lasx_xvnori_b:
3969 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3970 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3971 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3972 case Intrinsic::loongarch_lasx_xvpermi_d:
3973 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3974 case Intrinsic::loongarch_lsx_vshuf4i_d:
3975 case Intrinsic::loongarch_lsx_vpermi_w:
3976 case Intrinsic::loongarch_lsx_vbitseli_b:
3977 case Intrinsic::loongarch_lsx_vextrins_b:
3978 case Intrinsic::loongarch_lsx_vextrins_h:
3979 case Intrinsic::loongarch_lsx_vextrins_w:
3980 case Intrinsic::loongarch_lsx_vextrins_d:
3981 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3982 case Intrinsic::loongarch_lasx_xvpermi_w:
3983 case Intrinsic::loongarch_lasx_xvpermi_q:
3984 case Intrinsic::loongarch_lasx_xvbitseli_b:
3985 case Intrinsic::loongarch_lasx_xvextrins_b:
3986 case Intrinsic::loongarch_lasx_xvextrins_h:
3987 case Intrinsic::loongarch_lasx_xvextrins_w:
3988 case Intrinsic::loongarch_lasx_xvextrins_d:
3989 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3990 case Intrinsic::loongarch_lsx_vrepli_b:
3991 case Intrinsic::loongarch_lsx_vrepli_h:
3992 case Intrinsic::loongarch_lsx_vrepli_w:
3993 case Intrinsic::loongarch_lsx_vrepli_d:
3994 case Intrinsic::loongarch_lasx_xvrepli_b:
3995 case Intrinsic::loongarch_lasx_xvrepli_h:
3996 case Intrinsic::loongarch_lasx_xvrepli_w:
3997 case Intrinsic::loongarch_lasx_xvrepli_d:
3998 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3999 case Intrinsic::loongarch_lsx_vldi:
4000 case Intrinsic::loongarch_lasx_xvldi:
4001 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4002 }
4003}
4004
4005// Helper function that emits error message for intrinsics with chain and return
4006// merge values of a UNDEF and the chain.
4008 StringRef ErrorMsg,
4009 SelectionDAG &DAG) {
4010 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4011 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4012 SDLoc(Op));
4013}
4014
4015SDValue
4016LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4017 SelectionDAG &DAG) const {
4018 SDLoc DL(Op);
4019 MVT GRLenVT = Subtarget.getGRLenVT();
4020 EVT VT = Op.getValueType();
4021 SDValue Chain = Op.getOperand(0);
4022 const StringRef ErrorMsgOOR = "argument out of range";
4023 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4024 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4025
4026 switch (Op.getConstantOperandVal(1)) {
4027 default:
4028 return Op;
4029 case Intrinsic::loongarch_crc_w_b_w:
4030 case Intrinsic::loongarch_crc_w_h_w:
4031 case Intrinsic::loongarch_crc_w_w_w:
4032 case Intrinsic::loongarch_crc_w_d_w:
4033 case Intrinsic::loongarch_crcc_w_b_w:
4034 case Intrinsic::loongarch_crcc_w_h_w:
4035 case Intrinsic::loongarch_crcc_w_w_w:
4036 case Intrinsic::loongarch_crcc_w_d_w:
4037 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4038 case Intrinsic::loongarch_csrrd_w:
4039 case Intrinsic::loongarch_csrrd_d: {
4040 unsigned Imm = Op.getConstantOperandVal(2);
4041 return !isUInt<14>(Imm)
4042 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4043 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4044 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4045 }
4046 case Intrinsic::loongarch_csrwr_w:
4047 case Intrinsic::loongarch_csrwr_d: {
4048 unsigned Imm = Op.getConstantOperandVal(3);
4049 return !isUInt<14>(Imm)
4050 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4051 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4052 {Chain, Op.getOperand(2),
4053 DAG.getConstant(Imm, DL, GRLenVT)});
4054 }
4055 case Intrinsic::loongarch_csrxchg_w:
4056 case Intrinsic::loongarch_csrxchg_d: {
4057 unsigned Imm = Op.getConstantOperandVal(4);
4058 return !isUInt<14>(Imm)
4059 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4060 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4061 {Chain, Op.getOperand(2), Op.getOperand(3),
4062 DAG.getConstant(Imm, DL, GRLenVT)});
4063 }
4064 case Intrinsic::loongarch_iocsrrd_d: {
4065 return DAG.getNode(
4066 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4067 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4068 }
4069#define IOCSRRD_CASE(NAME, NODE) \
4070 case Intrinsic::loongarch_##NAME: { \
4071 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4072 {Chain, Op.getOperand(2)}); \
4073 }
4074 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4075 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4076 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4077#undef IOCSRRD_CASE
4078 case Intrinsic::loongarch_cpucfg: {
4079 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4080 {Chain, Op.getOperand(2)});
4081 }
4082 case Intrinsic::loongarch_lddir_d: {
4083 unsigned Imm = Op.getConstantOperandVal(3);
4084 return !isUInt<8>(Imm)
4085 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4086 : Op;
4087 }
4088 case Intrinsic::loongarch_movfcsr2gr: {
4089 if (!Subtarget.hasBasicF())
4090 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4091 unsigned Imm = Op.getConstantOperandVal(2);
4092 return !isUInt<2>(Imm)
4093 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4094 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4095 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4096 }
4097 case Intrinsic::loongarch_lsx_vld:
4098 case Intrinsic::loongarch_lsx_vldrepl_b:
4099 case Intrinsic::loongarch_lasx_xvld:
4100 case Intrinsic::loongarch_lasx_xvldrepl_b:
4101 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4102 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4103 : SDValue();
4104 case Intrinsic::loongarch_lsx_vldrepl_h:
4105 case Intrinsic::loongarch_lasx_xvldrepl_h:
4106 return !isShiftedInt<11, 1>(
4107 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4109 Op, "argument out of range or not a multiple of 2", DAG)
4110 : SDValue();
4111 case Intrinsic::loongarch_lsx_vldrepl_w:
4112 case Intrinsic::loongarch_lasx_xvldrepl_w:
4113 return !isShiftedInt<10, 2>(
4114 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4116 Op, "argument out of range or not a multiple of 4", DAG)
4117 : SDValue();
4118 case Intrinsic::loongarch_lsx_vldrepl_d:
4119 case Intrinsic::loongarch_lasx_xvldrepl_d:
4120 return !isShiftedInt<9, 3>(
4121 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4123 Op, "argument out of range or not a multiple of 8", DAG)
4124 : SDValue();
4125 }
4126}
4127
4128// Helper function that emits error message for intrinsics with void return
4129// value and return the chain.
4131 SelectionDAG &DAG) {
4132
4133 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4134 return Op.getOperand(0);
4135}
4136
4137SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4138 SelectionDAG &DAG) const {
4139 SDLoc DL(Op);
4140 MVT GRLenVT = Subtarget.getGRLenVT();
4141 SDValue Chain = Op.getOperand(0);
4142 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4143 SDValue Op2 = Op.getOperand(2);
4144 const StringRef ErrorMsgOOR = "argument out of range";
4145 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4146 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4147 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4148
4149 switch (IntrinsicEnum) {
4150 default:
4151 // TODO: Add more Intrinsics.
4152 return SDValue();
4153 case Intrinsic::loongarch_cacop_d:
4154 case Intrinsic::loongarch_cacop_w: {
4155 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4156 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4157 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4158 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4159 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4160 unsigned Imm1 = Op2->getAsZExtVal();
4161 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4162 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4163 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4164 return Op;
4165 }
4166 case Intrinsic::loongarch_dbar: {
4167 unsigned Imm = Op2->getAsZExtVal();
4168 return !isUInt<15>(Imm)
4169 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4170 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4171 DAG.getConstant(Imm, DL, GRLenVT));
4172 }
4173 case Intrinsic::loongarch_ibar: {
4174 unsigned Imm = Op2->getAsZExtVal();
4175 return !isUInt<15>(Imm)
4176 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4177 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4178 DAG.getConstant(Imm, DL, GRLenVT));
4179 }
4180 case Intrinsic::loongarch_break: {
4181 unsigned Imm = Op2->getAsZExtVal();
4182 return !isUInt<15>(Imm)
4183 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4184 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4185 DAG.getConstant(Imm, DL, GRLenVT));
4186 }
4187 case Intrinsic::loongarch_movgr2fcsr: {
4188 if (!Subtarget.hasBasicF())
4189 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4190 unsigned Imm = Op2->getAsZExtVal();
4191 return !isUInt<2>(Imm)
4192 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4193 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4194 DAG.getConstant(Imm, DL, GRLenVT),
4195 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4196 Op.getOperand(3)));
4197 }
4198 case Intrinsic::loongarch_syscall: {
4199 unsigned Imm = Op2->getAsZExtVal();
4200 return !isUInt<15>(Imm)
4201 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4202 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4203 DAG.getConstant(Imm, DL, GRLenVT));
4204 }
4205#define IOCSRWR_CASE(NAME, NODE) \
4206 case Intrinsic::loongarch_##NAME: { \
4207 SDValue Op3 = Op.getOperand(3); \
4208 return Subtarget.is64Bit() \
4209 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4210 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4211 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4212 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4213 Op3); \
4214 }
4215 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4216 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4217 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4218#undef IOCSRWR_CASE
4219 case Intrinsic::loongarch_iocsrwr_d: {
4220 return !Subtarget.is64Bit()
4221 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4222 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4223 Op2,
4224 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4225 Op.getOperand(3)));
4226 }
4227#define ASRT_LE_GT_CASE(NAME) \
4228 case Intrinsic::loongarch_##NAME: { \
4229 return !Subtarget.is64Bit() \
4230 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4231 : Op; \
4232 }
4233 ASRT_LE_GT_CASE(asrtle_d)
4234 ASRT_LE_GT_CASE(asrtgt_d)
4235#undef ASRT_LE_GT_CASE
4236 case Intrinsic::loongarch_ldpte_d: {
4237 unsigned Imm = Op.getConstantOperandVal(3);
4238 return !Subtarget.is64Bit()
4239 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4240 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4241 : Op;
4242 }
4243 case Intrinsic::loongarch_lsx_vst:
4244 case Intrinsic::loongarch_lasx_xvst:
4245 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4246 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4247 : SDValue();
4248 case Intrinsic::loongarch_lasx_xvstelm_b:
4249 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4250 !isUInt<5>(Op.getConstantOperandVal(5)))
4251 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4252 : SDValue();
4253 case Intrinsic::loongarch_lsx_vstelm_b:
4254 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4255 !isUInt<4>(Op.getConstantOperandVal(5)))
4256 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4257 : SDValue();
4258 case Intrinsic::loongarch_lasx_xvstelm_h:
4259 return (!isShiftedInt<8, 1>(
4260 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4261 !isUInt<4>(Op.getConstantOperandVal(5)))
4263 Op, "argument out of range or not a multiple of 2", DAG)
4264 : SDValue();
4265 case Intrinsic::loongarch_lsx_vstelm_h:
4266 return (!isShiftedInt<8, 1>(
4267 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4268 !isUInt<3>(Op.getConstantOperandVal(5)))
4270 Op, "argument out of range or not a multiple of 2", DAG)
4271 : SDValue();
4272 case Intrinsic::loongarch_lasx_xvstelm_w:
4273 return (!isShiftedInt<8, 2>(
4274 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4275 !isUInt<3>(Op.getConstantOperandVal(5)))
4277 Op, "argument out of range or not a multiple of 4", DAG)
4278 : SDValue();
4279 case Intrinsic::loongarch_lsx_vstelm_w:
4280 return (!isShiftedInt<8, 2>(
4281 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4282 !isUInt<2>(Op.getConstantOperandVal(5)))
4284 Op, "argument out of range or not a multiple of 4", DAG)
4285 : SDValue();
4286 case Intrinsic::loongarch_lasx_xvstelm_d:
4287 return (!isShiftedInt<8, 3>(
4288 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4289 !isUInt<2>(Op.getConstantOperandVal(5)))
4291 Op, "argument out of range or not a multiple of 8", DAG)
4292 : SDValue();
4293 case Intrinsic::loongarch_lsx_vstelm_d:
4294 return (!isShiftedInt<8, 3>(
4295 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4296 !isUInt<1>(Op.getConstantOperandVal(5)))
4298 Op, "argument out of range or not a multiple of 8", DAG)
4299 : SDValue();
4300 }
4301}
4302
4303SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4304 SelectionDAG &DAG) const {
4305 SDLoc DL(Op);
4306 SDValue Lo = Op.getOperand(0);
4307 SDValue Hi = Op.getOperand(1);
4308 SDValue Shamt = Op.getOperand(2);
4309 EVT VT = Lo.getValueType();
4310
4311 // if Shamt-GRLen < 0: // Shamt < GRLen
4312 // Lo = Lo << Shamt
4313 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4314 // else:
4315 // Lo = 0
4316 // Hi = Lo << (Shamt-GRLen)
4317
4318 SDValue Zero = DAG.getConstant(0, DL, VT);
4319 SDValue One = DAG.getConstant(1, DL, VT);
4320 SDValue MinusGRLen =
4321 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4322 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4323 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4324 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4325
4326 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4327 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4328 SDValue ShiftRightLo =
4329 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4330 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4331 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4332 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4333
4334 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4335
4336 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4337 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4338
4339 SDValue Parts[2] = {Lo, Hi};
4340 return DAG.getMergeValues(Parts, DL);
4341}
4342
4343SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4344 SelectionDAG &DAG,
4345 bool IsSRA) const {
4346 SDLoc DL(Op);
4347 SDValue Lo = Op.getOperand(0);
4348 SDValue Hi = Op.getOperand(1);
4349 SDValue Shamt = Op.getOperand(2);
4350 EVT VT = Lo.getValueType();
4351
4352 // SRA expansion:
4353 // if Shamt-GRLen < 0: // Shamt < GRLen
4354 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4355 // Hi = Hi >>s Shamt
4356 // else:
4357 // Lo = Hi >>s (Shamt-GRLen);
4358 // Hi = Hi >>s (GRLen-1)
4359 //
4360 // SRL expansion:
4361 // if Shamt-GRLen < 0: // Shamt < GRLen
4362 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4363 // Hi = Hi >>u Shamt
4364 // else:
4365 // Lo = Hi >>u (Shamt-GRLen);
4366 // Hi = 0;
4367
4368 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4369
4370 SDValue Zero = DAG.getConstant(0, DL, VT);
4371 SDValue One = DAG.getConstant(1, DL, VT);
4372 SDValue MinusGRLen =
4373 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4374 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4375 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4376 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4377
4378 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4379 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4380 SDValue ShiftLeftHi =
4381 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4382 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4383 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4384 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4385 SDValue HiFalse =
4386 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4387
4388 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4389
4390 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4391 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4392
4393 SDValue Parts[2] = {Lo, Hi};
4394 return DAG.getMergeValues(Parts, DL);
4395}
4396
4397// Returns the opcode of the target-specific SDNode that implements the 32-bit
4398// form of the given Opcode.
4400 switch (Opcode) {
4401 default:
4402 llvm_unreachable("Unexpected opcode");
4403 case ISD::SDIV:
4404 return LoongArchISD::DIV_W;
4405 case ISD::UDIV:
4406 return LoongArchISD::DIV_WU;
4407 case ISD::SREM:
4408 return LoongArchISD::MOD_W;
4409 case ISD::UREM:
4410 return LoongArchISD::MOD_WU;
4411 case ISD::SHL:
4412 return LoongArchISD::SLL_W;
4413 case ISD::SRA:
4414 return LoongArchISD::SRA_W;
4415 case ISD::SRL:
4416 return LoongArchISD::SRL_W;
4417 case ISD::ROTL:
4418 case ISD::ROTR:
4419 return LoongArchISD::ROTR_W;
4420 case ISD::CTTZ:
4421 return LoongArchISD::CTZ_W;
4422 case ISD::CTLZ:
4423 return LoongArchISD::CLZ_W;
4424 }
4425}
4426
4427// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4428// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4429// otherwise be promoted to i64, making it difficult to select the
4430// SLL_W/.../*W later one because the fact the operation was originally of
4431// type i8/i16/i32 is lost.
4433 unsigned ExtOpc = ISD::ANY_EXTEND) {
4434 SDLoc DL(N);
4435 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4436 SDValue NewOp0, NewRes;
4437
4438 switch (NumOp) {
4439 default:
4440 llvm_unreachable("Unexpected NumOp");
4441 case 1: {
4442 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4443 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4444 break;
4445 }
4446 case 2: {
4447 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4448 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4449 if (N->getOpcode() == ISD::ROTL) {
4450 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4451 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4452 }
4453 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4454 break;
4455 }
4456 // TODO:Handle more NumOp.
4457 }
4458
4459 // ReplaceNodeResults requires we maintain the same type for the return
4460 // value.
4461 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4462}
4463
4464// Converts the given 32-bit operation to a i64 operation with signed extension
4465// semantic to reduce the signed extension instructions.
4467 SDLoc DL(N);
4468 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4469 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4470 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4471 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4472 DAG.getValueType(MVT::i32));
4473 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4474}
4475
4476// Helper function that emits error message for intrinsics with/without chain
4477// and return a UNDEF or and the chain as the results.
4480 StringRef ErrorMsg, bool WithChain = true) {
4481 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4482 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4483 if (!WithChain)
4484 return;
4485 Results.push_back(N->getOperand(0));
4486}
4487
4488template <unsigned N>
4489static void
4491 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4492 unsigned ResOp) {
4493 const StringRef ErrorMsgOOR = "argument out of range";
4494 unsigned Imm = Node->getConstantOperandVal(2);
4495 if (!isUInt<N>(Imm)) {
4497 /*WithChain=*/false);
4498 return;
4499 }
4500 SDLoc DL(Node);
4501 SDValue Vec = Node->getOperand(1);
4502
4503 SDValue PickElt =
4504 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4505 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4507 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4508 PickElt.getValue(0)));
4509}
4510
4513 SelectionDAG &DAG,
4514 const LoongArchSubtarget &Subtarget,
4515 unsigned ResOp) {
4516 SDLoc DL(N);
4517 SDValue Vec = N->getOperand(1);
4518
4519 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4520 Results.push_back(
4521 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4522}
4523
4524static void
4526 SelectionDAG &DAG,
4527 const LoongArchSubtarget &Subtarget) {
4528 switch (N->getConstantOperandVal(0)) {
4529 default:
4530 llvm_unreachable("Unexpected Intrinsic.");
4531 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4532 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4534 break;
4535 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4536 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4537 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4539 break;
4540 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4541 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4543 break;
4544 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4545 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4547 break;
4548 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4549 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4550 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4552 break;
4553 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4554 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4556 break;
4557 case Intrinsic::loongarch_lsx_bz_b:
4558 case Intrinsic::loongarch_lsx_bz_h:
4559 case Intrinsic::loongarch_lsx_bz_w:
4560 case Intrinsic::loongarch_lsx_bz_d:
4561 case Intrinsic::loongarch_lasx_xbz_b:
4562 case Intrinsic::loongarch_lasx_xbz_h:
4563 case Intrinsic::loongarch_lasx_xbz_w:
4564 case Intrinsic::loongarch_lasx_xbz_d:
4565 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4567 break;
4568 case Intrinsic::loongarch_lsx_bz_v:
4569 case Intrinsic::loongarch_lasx_xbz_v:
4570 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4572 break;
4573 case Intrinsic::loongarch_lsx_bnz_b:
4574 case Intrinsic::loongarch_lsx_bnz_h:
4575 case Intrinsic::loongarch_lsx_bnz_w:
4576 case Intrinsic::loongarch_lsx_bnz_d:
4577 case Intrinsic::loongarch_lasx_xbnz_b:
4578 case Intrinsic::loongarch_lasx_xbnz_h:
4579 case Intrinsic::loongarch_lasx_xbnz_w:
4580 case Intrinsic::loongarch_lasx_xbnz_d:
4581 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4583 break;
4584 case Intrinsic::loongarch_lsx_bnz_v:
4585 case Intrinsic::loongarch_lasx_xbnz_v:
4586 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4588 break;
4589 }
4590}
4591
4594 SelectionDAG &DAG) {
4595 assert(N->getValueType(0) == MVT::i128 &&
4596 "AtomicCmpSwap on types less than 128 should be legal");
4597 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4598
4599 unsigned Opcode;
4600 switch (MemOp->getMergedOrdering()) {
4604 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4605 break;
4608 Opcode = LoongArch::PseudoCmpXchg128;
4609 break;
4610 default:
4611 llvm_unreachable("Unexpected ordering!");
4612 }
4613
4614 SDLoc DL(N);
4615 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4616 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4617 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4618 NewVal.first, NewVal.second, N->getOperand(0)};
4619
4620 SDNode *CmpSwap = DAG.getMachineNode(
4621 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4622 Ops);
4623 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4624 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4625 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4626 Results.push_back(SDValue(CmpSwap, 3));
4627}
4628
4631 SDLoc DL(N);
4632 EVT VT = N->getValueType(0);
4633 switch (N->getOpcode()) {
4634 default:
4635 llvm_unreachable("Don't know how to legalize this operation");
4636 case ISD::ADD:
4637 case ISD::SUB:
4638 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4639 "Unexpected custom legalisation");
4640 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4641 break;
4642 case ISD::SDIV:
4643 case ISD::UDIV:
4644 case ISD::SREM:
4645 case ISD::UREM:
4646 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4647 "Unexpected custom legalisation");
4648 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4649 Subtarget.hasDiv32() && VT == MVT::i32
4651 : ISD::SIGN_EXTEND));
4652 break;
4653 case ISD::SHL:
4654 case ISD::SRA:
4655 case ISD::SRL:
4656 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4657 "Unexpected custom legalisation");
4658 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4659 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4660 break;
4661 }
4662 break;
4663 case ISD::ROTL:
4664 case ISD::ROTR:
4665 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4666 "Unexpected custom legalisation");
4667 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4668 break;
4669 case ISD::FP_TO_SINT: {
4670 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4671 "Unexpected custom legalisation");
4672 SDValue Src = N->getOperand(0);
4673 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4674 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4676 if (!isTypeLegal(Src.getValueType()))
4677 return;
4678 if (Src.getValueType() == MVT::f16)
4679 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4680 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4681 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4682 return;
4683 }
4684 // If the FP type needs to be softened, emit a library call using the 'si'
4685 // version. If we left it to default legalization we'd end up with 'di'.
4686 RTLIB::Libcall LC;
4687 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4688 MakeLibCallOptions CallOptions;
4689 EVT OpVT = Src.getValueType();
4690 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4691 SDValue Chain = SDValue();
4692 SDValue Result;
4693 std::tie(Result, Chain) =
4694 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4695 Results.push_back(Result);
4696 break;
4697 }
4698 case ISD::BITCAST: {
4699 SDValue Src = N->getOperand(0);
4700 EVT SrcVT = Src.getValueType();
4701 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4702 Subtarget.hasBasicF()) {
4703 SDValue Dst =
4704 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4705 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4706 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4708 DAG.getVTList(MVT::i32, MVT::i32), Src);
4709 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4710 NewReg.getValue(0), NewReg.getValue(1));
4711 Results.push_back(RetReg);
4712 }
4713 break;
4714 }
4715 case ISD::FP_TO_UINT: {
4716 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4717 "Unexpected custom legalisation");
4718 auto &TLI = DAG.getTargetLoweringInfo();
4719 SDValue Tmp1, Tmp2;
4720 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4721 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4722 break;
4723 }
4724 case ISD::BSWAP: {
4725 SDValue Src = N->getOperand(0);
4726 assert((VT == MVT::i16 || VT == MVT::i32) &&
4727 "Unexpected custom legalization");
4728 MVT GRLenVT = Subtarget.getGRLenVT();
4729 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4730 SDValue Tmp;
4731 switch (VT.getSizeInBits()) {
4732 default:
4733 llvm_unreachable("Unexpected operand width");
4734 case 16:
4735 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4736 break;
4737 case 32:
4738 // Only LA64 will get to here due to the size mismatch between VT and
4739 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4740 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4741 break;
4742 }
4743 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4744 break;
4745 }
4746 case ISD::BITREVERSE: {
4747 SDValue Src = N->getOperand(0);
4748 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4749 "Unexpected custom legalization");
4750 MVT GRLenVT = Subtarget.getGRLenVT();
4751 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4752 SDValue Tmp;
4753 switch (VT.getSizeInBits()) {
4754 default:
4755 llvm_unreachable("Unexpected operand width");
4756 case 8:
4757 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4758 break;
4759 case 32:
4760 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4761 break;
4762 }
4763 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4764 break;
4765 }
4766 case ISD::CTLZ:
4767 case ISD::CTTZ: {
4768 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4769 "Unexpected custom legalisation");
4770 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4771 break;
4772 }
4774 SDValue Chain = N->getOperand(0);
4775 SDValue Op2 = N->getOperand(2);
4776 MVT GRLenVT = Subtarget.getGRLenVT();
4777 const StringRef ErrorMsgOOR = "argument out of range";
4778 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4779 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4780
4781 switch (N->getConstantOperandVal(1)) {
4782 default:
4783 llvm_unreachable("Unexpected Intrinsic.");
4784 case Intrinsic::loongarch_movfcsr2gr: {
4785 if (!Subtarget.hasBasicF()) {
4786 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4787 return;
4788 }
4789 unsigned Imm = Op2->getAsZExtVal();
4790 if (!isUInt<2>(Imm)) {
4791 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4792 return;
4793 }
4794 SDValue MOVFCSR2GRResults = DAG.getNode(
4795 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4796 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4797 Results.push_back(
4798 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4799 Results.push_back(MOVFCSR2GRResults.getValue(1));
4800 break;
4801 }
4802#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4803 case Intrinsic::loongarch_##NAME: { \
4804 SDValue NODE = DAG.getNode( \
4805 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4806 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4807 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4808 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4809 Results.push_back(NODE.getValue(1)); \
4810 break; \
4811 }
4812 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4813 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4814 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4815 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4816 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4817 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4818#undef CRC_CASE_EXT_BINARYOP
4819
4820#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4821 case Intrinsic::loongarch_##NAME: { \
4822 SDValue NODE = DAG.getNode( \
4823 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4824 {Chain, Op2, \
4825 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4826 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4827 Results.push_back(NODE.getValue(1)); \
4828 break; \
4829 }
4830 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4831 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4832#undef CRC_CASE_EXT_UNARYOP
4833#define CSR_CASE(ID) \
4834 case Intrinsic::loongarch_##ID: { \
4835 if (!Subtarget.is64Bit()) \
4836 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4837 break; \
4838 }
4839 CSR_CASE(csrrd_d);
4840 CSR_CASE(csrwr_d);
4841 CSR_CASE(csrxchg_d);
4842 CSR_CASE(iocsrrd_d);
4843#undef CSR_CASE
4844 case Intrinsic::loongarch_csrrd_w: {
4845 unsigned Imm = Op2->getAsZExtVal();
4846 if (!isUInt<14>(Imm)) {
4847 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4848 return;
4849 }
4850 SDValue CSRRDResults =
4851 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4852 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4853 Results.push_back(
4854 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4855 Results.push_back(CSRRDResults.getValue(1));
4856 break;
4857 }
4858 case Intrinsic::loongarch_csrwr_w: {
4859 unsigned Imm = N->getConstantOperandVal(3);
4860 if (!isUInt<14>(Imm)) {
4861 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4862 return;
4863 }
4864 SDValue CSRWRResults =
4865 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4866 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4867 DAG.getConstant(Imm, DL, GRLenVT)});
4868 Results.push_back(
4869 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4870 Results.push_back(CSRWRResults.getValue(1));
4871 break;
4872 }
4873 case Intrinsic::loongarch_csrxchg_w: {
4874 unsigned Imm = N->getConstantOperandVal(4);
4875 if (!isUInt<14>(Imm)) {
4876 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4877 return;
4878 }
4879 SDValue CSRXCHGResults = DAG.getNode(
4880 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4881 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4882 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4883 DAG.getConstant(Imm, DL, GRLenVT)});
4884 Results.push_back(
4885 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4886 Results.push_back(CSRXCHGResults.getValue(1));
4887 break;
4888 }
4889#define IOCSRRD_CASE(NAME, NODE) \
4890 case Intrinsic::loongarch_##NAME: { \
4891 SDValue IOCSRRDResults = \
4892 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4893 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4894 Results.push_back( \
4895 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4896 Results.push_back(IOCSRRDResults.getValue(1)); \
4897 break; \
4898 }
4899 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4900 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4901 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4902#undef IOCSRRD_CASE
4903 case Intrinsic::loongarch_cpucfg: {
4904 SDValue CPUCFGResults =
4905 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4906 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4907 Results.push_back(
4908 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4909 Results.push_back(CPUCFGResults.getValue(1));
4910 break;
4911 }
4912 case Intrinsic::loongarch_lddir_d: {
4913 if (!Subtarget.is64Bit()) {
4914 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4915 return;
4916 }
4917 break;
4918 }
4919 }
4920 break;
4921 }
4922 case ISD::READ_REGISTER: {
4923 if (Subtarget.is64Bit())
4924 DAG.getContext()->emitError(
4925 "On LA64, only 64-bit registers can be read.");
4926 else
4927 DAG.getContext()->emitError(
4928 "On LA32, only 32-bit registers can be read.");
4929 Results.push_back(DAG.getUNDEF(VT));
4930 Results.push_back(N->getOperand(0));
4931 break;
4932 }
4934 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4935 break;
4936 }
4937 case ISD::LROUND: {
4938 SDValue Op0 = N->getOperand(0);
4939 EVT OpVT = Op0.getValueType();
4940 RTLIB::Libcall LC =
4941 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4942 MakeLibCallOptions CallOptions;
4943 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4944 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4945 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4946 Results.push_back(Result);
4947 break;
4948 }
4949 case ISD::ATOMIC_CMP_SWAP: {
4951 break;
4952 }
4953 case ISD::TRUNCATE: {
4954 MVT VT = N->getSimpleValueType(0);
4955 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4956 return;
4957
4958 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4959 SDValue In = N->getOperand(0);
4960 EVT InVT = In.getValueType();
4961 EVT InEltVT = InVT.getVectorElementType();
4962 EVT EltVT = VT.getVectorElementType();
4963 unsigned MinElts = VT.getVectorNumElements();
4964 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4965 unsigned InBits = InVT.getSizeInBits();
4966
4967 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4968 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4969 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4970 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4971 for (unsigned I = 0; I < MinElts; ++I)
4972 TruncMask[I] = Scale * I;
4973
4974 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4975 MVT SVT = In.getSimpleValueType().getScalarType();
4976 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4977 SDValue WidenIn =
4978 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4979 DAG.getVectorIdxConstant(0, DL));
4980 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4981 "Illegal vector type in truncation");
4982 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4983 Results.push_back(
4984 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4985 return;
4986 }
4987 }
4988
4989 break;
4990 }
4991 }
4992}
4993
4996 const LoongArchSubtarget &Subtarget) {
4997 if (DCI.isBeforeLegalizeOps())
4998 return SDValue();
4999
5000 SDValue FirstOperand = N->getOperand(0);
5001 SDValue SecondOperand = N->getOperand(1);
5002 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5003 EVT ValTy = N->getValueType(0);
5004 SDLoc DL(N);
5005 uint64_t lsb, msb;
5006 unsigned SMIdx, SMLen;
5007 ConstantSDNode *CN;
5008 SDValue NewOperand;
5009 MVT GRLenVT = Subtarget.getGRLenVT();
5010
5011 // BSTRPICK requires the 32S feature.
5012 if (!Subtarget.has32S())
5013 return SDValue();
5014
5015 // Op's second operand must be a shifted mask.
5016 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5017 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5018 return SDValue();
5019
5020 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5021 // Pattern match BSTRPICK.
5022 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5023 // => BSTRPICK $dst, $src, msb, lsb
5024 // where msb = lsb + len - 1
5025
5026 // The second operand of the shift must be an immediate.
5027 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5028 return SDValue();
5029
5030 lsb = CN->getZExtValue();
5031
5032 // Return if the shifted mask does not start at bit 0 or the sum of its
5033 // length and lsb exceeds the word's size.
5034 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5035 return SDValue();
5036
5037 NewOperand = FirstOperand.getOperand(0);
5038 } else {
5039 // Pattern match BSTRPICK.
5040 // $dst = and $src, (2**len- 1) , if len > 12
5041 // => BSTRPICK $dst, $src, msb, lsb
5042 // where lsb = 0 and msb = len - 1
5043
5044 // If the mask is <= 0xfff, andi can be used instead.
5045 if (CN->getZExtValue() <= 0xfff)
5046 return SDValue();
5047
5048 // Return if the MSB exceeds.
5049 if (SMIdx + SMLen > ValTy.getSizeInBits())
5050 return SDValue();
5051
5052 if (SMIdx > 0) {
5053 // Omit if the constant has more than 2 uses. This a conservative
5054 // decision. Whether it is a win depends on the HW microarchitecture.
5055 // However it should always be better for 1 and 2 uses.
5056 if (CN->use_size() > 2)
5057 return SDValue();
5058 // Return if the constant can be composed by a single LU12I.W.
5059 if ((CN->getZExtValue() & 0xfff) == 0)
5060 return SDValue();
5061 // Return if the constand can be composed by a single ADDI with
5062 // the zero register.
5063 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5064 return SDValue();
5065 }
5066
5067 lsb = SMIdx;
5068 NewOperand = FirstOperand;
5069 }
5070
5071 msb = lsb + SMLen - 1;
5072 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5073 DAG.getConstant(msb, DL, GRLenVT),
5074 DAG.getConstant(lsb, DL, GRLenVT));
5075 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5076 return NR0;
5077 // Try to optimize to
5078 // bstrpick $Rd, $Rs, msb, lsb
5079 // slli $Rd, $Rd, lsb
5080 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5081 DAG.getConstant(lsb, DL, GRLenVT));
5082}
5083
5086 const LoongArchSubtarget &Subtarget) {
5087 // BSTRPICK requires the 32S feature.
5088 if (!Subtarget.has32S())
5089 return SDValue();
5090
5091 if (DCI.isBeforeLegalizeOps())
5092 return SDValue();
5093
5094 // $dst = srl (and $src, Mask), Shamt
5095 // =>
5096 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5097 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5098 //
5099
5100 SDValue FirstOperand = N->getOperand(0);
5101 ConstantSDNode *CN;
5102 EVT ValTy = N->getValueType(0);
5103 SDLoc DL(N);
5104 MVT GRLenVT = Subtarget.getGRLenVT();
5105 unsigned MaskIdx, MaskLen;
5106 uint64_t Shamt;
5107
5108 // The first operand must be an AND and the second operand of the AND must be
5109 // a shifted mask.
5110 if (FirstOperand.getOpcode() != ISD::AND ||
5111 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5112 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5113 return SDValue();
5114
5115 // The second operand (shift amount) must be an immediate.
5116 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5117 return SDValue();
5118
5119 Shamt = CN->getZExtValue();
5120 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5121 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5122 FirstOperand->getOperand(0),
5123 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5124 DAG.getConstant(Shamt, DL, GRLenVT));
5125
5126 return SDValue();
5127}
5128
5129// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5130// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5131static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5132 unsigned Depth) {
5133 // Limit recursion.
5135 return false;
5136 switch (Src.getOpcode()) {
5137 case ISD::SETCC:
5138 case ISD::TRUNCATE:
5139 return Src.getOperand(0).getValueSizeInBits() == Size;
5140 case ISD::FREEZE:
5141 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5142 case ISD::AND:
5143 case ISD::XOR:
5144 case ISD::OR:
5145 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5146 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5147 case ISD::SELECT:
5148 case ISD::VSELECT:
5149 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5150 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5151 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5152 case ISD::BUILD_VECTOR:
5153 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5154 ISD::isBuildVectorAllOnes(Src.getNode());
5155 }
5156 return false;
5157}
5158
5159// Helper to push sign extension of vXi1 SETCC result through bitops.
5161 SDValue Src, const SDLoc &DL) {
5162 switch (Src.getOpcode()) {
5163 case ISD::SETCC:
5164 case ISD::FREEZE:
5165 case ISD::TRUNCATE:
5166 case ISD::BUILD_VECTOR:
5167 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5168 case ISD::AND:
5169 case ISD::XOR:
5170 case ISD::OR:
5171 return DAG.getNode(
5172 Src.getOpcode(), DL, SExtVT,
5173 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5174 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5175 case ISD::SELECT:
5176 case ISD::VSELECT:
5177 return DAG.getSelect(
5178 DL, SExtVT, Src.getOperand(0),
5179 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5180 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5181 }
5182 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5183}
5184
5185static SDValue
5188 const LoongArchSubtarget &Subtarget) {
5189 SDLoc DL(N);
5190 EVT VT = N->getValueType(0);
5191 SDValue Src = N->getOperand(0);
5192 EVT SrcVT = Src.getValueType();
5193
5194 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5195 return SDValue();
5196
5197 bool UseLASX;
5198 unsigned Opc = ISD::DELETED_NODE;
5199 EVT CmpVT = Src.getOperand(0).getValueType();
5200 EVT EltVT = CmpVT.getVectorElementType();
5201
5202 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5203 UseLASX = false;
5204 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5205 CmpVT.getSizeInBits() == 256)
5206 UseLASX = true;
5207 else
5208 return SDValue();
5209
5210 SDValue SrcN1 = Src.getOperand(1);
5211 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5212 default:
5213 break;
5214 case ISD::SETEQ:
5215 // x == 0 => not (vmsknez.b x)
5216 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5218 break;
5219 case ISD::SETGT:
5220 // x > -1 => vmskgez.b x
5221 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5223 break;
5224 case ISD::SETGE:
5225 // x >= 0 => vmskgez.b x
5226 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5228 break;
5229 case ISD::SETLT:
5230 // x < 0 => vmskltz.{b,h,w,d} x
5231 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5232 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5233 EltVT == MVT::i64))
5235 break;
5236 case ISD::SETLE:
5237 // x <= -1 => vmskltz.{b,h,w,d} x
5238 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5239 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5240 EltVT == MVT::i64))
5242 break;
5243 case ISD::SETNE:
5244 // x != 0 => vmsknez.b x
5245 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5247 break;
5248 }
5249
5250 if (Opc == ISD::DELETED_NODE)
5251 return SDValue();
5252
5253 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5255 V = DAG.getZExtOrTrunc(V, DL, T);
5256 return DAG.getBitcast(VT, V);
5257}
5258
5261 const LoongArchSubtarget &Subtarget) {
5262 SDLoc DL(N);
5263 EVT VT = N->getValueType(0);
5264 SDValue Src = N->getOperand(0);
5265 EVT SrcVT = Src.getValueType();
5266 MVT GRLenVT = Subtarget.getGRLenVT();
5267
5268 if (!DCI.isBeforeLegalizeOps())
5269 return SDValue();
5270
5271 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5272 return SDValue();
5273
5274 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5275 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5276 if (Res)
5277 return Res;
5278
5279 // Generate vXi1 using [X]VMSKLTZ
5280 MVT SExtVT;
5281 unsigned Opc;
5282 bool UseLASX = false;
5283 bool PropagateSExt = false;
5284
5285 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5286 EVT CmpVT = Src.getOperand(0).getValueType();
5287 if (CmpVT.getSizeInBits() > 256)
5288 return SDValue();
5289 }
5290
5291 switch (SrcVT.getSimpleVT().SimpleTy) {
5292 default:
5293 return SDValue();
5294 case MVT::v2i1:
5295 SExtVT = MVT::v2i64;
5296 break;
5297 case MVT::v4i1:
5298 SExtVT = MVT::v4i32;
5299 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5300 SExtVT = MVT::v4i64;
5301 UseLASX = true;
5302 PropagateSExt = true;
5303 }
5304 break;
5305 case MVT::v8i1:
5306 SExtVT = MVT::v8i16;
5307 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5308 SExtVT = MVT::v8i32;
5309 UseLASX = true;
5310 PropagateSExt = true;
5311 }
5312 break;
5313 case MVT::v16i1:
5314 SExtVT = MVT::v16i8;
5315 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5316 SExtVT = MVT::v16i16;
5317 UseLASX = true;
5318 PropagateSExt = true;
5319 }
5320 break;
5321 case MVT::v32i1:
5322 SExtVT = MVT::v32i8;
5323 UseLASX = true;
5324 break;
5325 };
5326 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5327 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5328
5329 SDValue V;
5330 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5331 if (Src.getSimpleValueType() == MVT::v32i8) {
5332 SDValue Lo, Hi;
5333 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5334 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5335 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5336 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5337 DAG.getConstant(16, DL, MVT::i8));
5338 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5339 } else if (UseLASX) {
5340 return SDValue();
5341 }
5342 }
5343
5344 if (!V) {
5346 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5347 }
5348
5350 V = DAG.getZExtOrTrunc(V, DL, T);
5351 return DAG.getBitcast(VT, V);
5352}
5353
5356 const LoongArchSubtarget &Subtarget) {
5357 MVT GRLenVT = Subtarget.getGRLenVT();
5358 EVT ValTy = N->getValueType(0);
5359 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5360 ConstantSDNode *CN0, *CN1;
5361 SDLoc DL(N);
5362 unsigned ValBits = ValTy.getSizeInBits();
5363 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5364 unsigned Shamt;
5365 bool SwapAndRetried = false;
5366
5367 // BSTRPICK requires the 32S feature.
5368 if (!Subtarget.has32S())
5369 return SDValue();
5370
5371 if (DCI.isBeforeLegalizeOps())
5372 return SDValue();
5373
5374 if (ValBits != 32 && ValBits != 64)
5375 return SDValue();
5376
5377Retry:
5378 // 1st pattern to match BSTRINS:
5379 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5380 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5381 // =>
5382 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5383 if (N0.getOpcode() == ISD::AND &&
5384 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5385 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5386 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5387 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5388 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5389 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5390 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5391 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5392 (MaskIdx0 + MaskLen0 <= ValBits)) {
5393 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5394 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5395 N1.getOperand(0).getOperand(0),
5396 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5397 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5398 }
5399
5400 // 2nd pattern to match BSTRINS:
5401 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5402 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5403 // =>
5404 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5405 if (N0.getOpcode() == ISD::AND &&
5406 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5407 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5408 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5409 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5410 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5411 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5412 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5413 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5414 (MaskIdx0 + MaskLen0 <= ValBits)) {
5415 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5416 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5417 N1.getOperand(0).getOperand(0),
5418 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5419 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5420 }
5421
5422 // 3rd pattern to match BSTRINS:
5423 // R = or (and X, mask0), (and Y, mask1)
5424 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5425 // =>
5426 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5427 // where msb = lsb + size - 1
5428 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5429 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5430 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5431 (MaskIdx0 + MaskLen0 <= 64) &&
5432 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5433 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5434 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5435 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5436 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5437 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5438 DAG.getConstant(ValBits == 32
5439 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5440 : (MaskIdx0 + MaskLen0 - 1),
5441 DL, GRLenVT),
5442 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5443 }
5444
5445 // 4th pattern to match BSTRINS:
5446 // R = or (and X, mask), (shl Y, shamt)
5447 // where mask = (2**shamt - 1)
5448 // =>
5449 // R = BSTRINS X, Y, ValBits - 1, shamt
5450 // where ValBits = 32 or 64
5451 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5452 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5453 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5454 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5455 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5456 (MaskIdx0 + MaskLen0 <= ValBits)) {
5457 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5458 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5459 N1.getOperand(0),
5460 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5461 DAG.getConstant(Shamt, DL, GRLenVT));
5462 }
5463
5464 // 5th pattern to match BSTRINS:
5465 // R = or (and X, mask), const
5466 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5467 // =>
5468 // R = BSTRINS X, (const >> lsb), msb, lsb
5469 // where msb = lsb + size - 1
5470 if (N0.getOpcode() == ISD::AND &&
5471 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5472 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5473 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5474 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5475 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5476 return DAG.getNode(
5477 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5478 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5479 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5480 : (MaskIdx0 + MaskLen0 - 1),
5481 DL, GRLenVT),
5482 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5483 }
5484
5485 // 6th pattern.
5486 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5487 // by the incoming bits are known to be zero.
5488 // =>
5489 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5490 //
5491 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5492 // pattern is more common than the 1st. So we put the 1st before the 6th in
5493 // order to match as many nodes as possible.
5494 ConstantSDNode *CNMask, *CNShamt;
5495 unsigned MaskIdx, MaskLen;
5496 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5497 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5498 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5499 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5500 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5501 Shamt = CNShamt->getZExtValue();
5502 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5503 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5504 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5505 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5506 N1.getOperand(0).getOperand(0),
5507 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5508 DAG.getConstant(Shamt, DL, GRLenVT));
5509 }
5510 }
5511
5512 // 7th pattern.
5513 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5514 // overwritten by the incoming bits are known to be zero.
5515 // =>
5516 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5517 //
5518 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5519 // before the 7th in order to match as many nodes as possible.
5520 if (N1.getOpcode() == ISD::AND &&
5521 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5522 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5523 N1.getOperand(0).getOpcode() == ISD::SHL &&
5524 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5525 CNShamt->getZExtValue() == MaskIdx) {
5526 APInt ShMask(ValBits, CNMask->getZExtValue());
5527 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5528 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5529 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5530 N1.getOperand(0).getOperand(0),
5531 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5532 DAG.getConstant(MaskIdx, DL, GRLenVT));
5533 }
5534 }
5535
5536 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5537 if (!SwapAndRetried) {
5538 std::swap(N0, N1);
5539 SwapAndRetried = true;
5540 goto Retry;
5541 }
5542
5543 SwapAndRetried = false;
5544Retry2:
5545 // 8th pattern.
5546 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5547 // the incoming bits are known to be zero.
5548 // =>
5549 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5550 //
5551 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5552 // we put it here in order to match as many nodes as possible or generate less
5553 // instructions.
5554 if (N1.getOpcode() == ISD::AND &&
5555 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5556 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5557 APInt ShMask(ValBits, CNMask->getZExtValue());
5558 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5559 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5560 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5561 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5562 N1->getOperand(0),
5563 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5564 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5565 DAG.getConstant(MaskIdx, DL, GRLenVT));
5566 }
5567 }
5568 // Swap N0/N1 and retry.
5569 if (!SwapAndRetried) {
5570 std::swap(N0, N1);
5571 SwapAndRetried = true;
5572 goto Retry2;
5573 }
5574
5575 return SDValue();
5576}
5577
5578static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5579 ExtType = ISD::NON_EXTLOAD;
5580
5581 switch (V.getNode()->getOpcode()) {
5582 case ISD::LOAD: {
5583 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5584 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5585 (LoadNode->getMemoryVT() == MVT::i16)) {
5586 ExtType = LoadNode->getExtensionType();
5587 return true;
5588 }
5589 return false;
5590 }
5591 case ISD::AssertSext: {
5592 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5593 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5594 ExtType = ISD::SEXTLOAD;
5595 return true;
5596 }
5597 return false;
5598 }
5599 case ISD::AssertZext: {
5600 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5601 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5602 ExtType = ISD::ZEXTLOAD;
5603 return true;
5604 }
5605 return false;
5606 }
5607 default:
5608 return false;
5609 }
5610
5611 return false;
5612}
5613
5614// Eliminate redundant truncation and zero-extension nodes.
5615// * Case 1:
5616// +------------+ +------------+ +------------+
5617// | Input1 | | Input2 | | CC |
5618// +------------+ +------------+ +------------+
5619// | | |
5620// V V +----+
5621// +------------+ +------------+ |
5622// | TRUNCATE | | TRUNCATE | |
5623// +------------+ +------------+ |
5624// | | |
5625// V V |
5626// +------------+ +------------+ |
5627// | ZERO_EXT | | ZERO_EXT | |
5628// +------------+ +------------+ |
5629// | | |
5630// | +-------------+ |
5631// V V | |
5632// +----------------+ | |
5633// | AND | | |
5634// +----------------+ | |
5635// | | |
5636// +---------------+ | |
5637// | | |
5638// V V V
5639// +-------------+
5640// | CMP |
5641// +-------------+
5642// * Case 2:
5643// +------------+ +------------+ +-------------+ +------------+ +------------+
5644// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5645// +------------+ +------------+ +-------------+ +------------+ +------------+
5646// | | | | |
5647// V | | | |
5648// +------------+ | | | |
5649// | XOR |<---------------------+ | |
5650// +------------+ | | |
5651// | | | |
5652// V V +---------------+ |
5653// +------------+ +------------+ | |
5654// | TRUNCATE | | TRUNCATE | | +-------------------------+
5655// +------------+ +------------+ | |
5656// | | | |
5657// V V | |
5658// +------------+ +------------+ | |
5659// | ZERO_EXT | | ZERO_EXT | | |
5660// +------------+ +------------+ | |
5661// | | | |
5662// V V | |
5663// +----------------+ | |
5664// | AND | | |
5665// +----------------+ | |
5666// | | |
5667// +---------------+ | |
5668// | | |
5669// V V V
5670// +-------------+
5671// | CMP |
5672// +-------------+
5675 const LoongArchSubtarget &Subtarget) {
5676 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5677
5678 SDNode *AndNode = N->getOperand(0).getNode();
5679 if (AndNode->getOpcode() != ISD::AND)
5680 return SDValue();
5681
5682 SDValue AndInputValue2 = AndNode->getOperand(1);
5683 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5684 return SDValue();
5685
5686 SDValue CmpInputValue = N->getOperand(1);
5687 SDValue AndInputValue1 = AndNode->getOperand(0);
5688 if (AndInputValue1.getOpcode() == ISD::XOR) {
5689 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5690 return SDValue();
5691 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5692 if (!CN || CN->getSExtValue() != -1)
5693 return SDValue();
5694 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5695 if (!CN || CN->getSExtValue() != 0)
5696 return SDValue();
5697 AndInputValue1 = AndInputValue1.getOperand(0);
5698 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5699 return SDValue();
5700 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5701 if (AndInputValue2 != CmpInputValue)
5702 return SDValue();
5703 } else {
5704 return SDValue();
5705 }
5706
5707 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5708 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5709 return SDValue();
5710
5711 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5712 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5713 return SDValue();
5714
5715 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5716 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5717 ISD::LoadExtType ExtType1;
5718 ISD::LoadExtType ExtType2;
5719
5720 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5721 !checkValueWidth(TruncInputValue2, ExtType2))
5722 return SDValue();
5723
5724 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5725 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5726 return SDValue();
5727
5728 if ((ExtType2 != ISD::ZEXTLOAD) &&
5729 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5730 return SDValue();
5731
5732 // These truncation and zero-extension nodes are not necessary, remove them.
5733 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5734 TruncInputValue1, TruncInputValue2);
5735 SDValue NewSetCC =
5736 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5737 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5738 return SDValue(N, 0);
5739}
5740
5741// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5744 const LoongArchSubtarget &Subtarget) {
5745 if (DCI.isBeforeLegalizeOps())
5746 return SDValue();
5747
5748 SDValue Src = N->getOperand(0);
5749 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5750 return SDValue();
5751
5752 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5753 Src.getOperand(0));
5754}
5755
5756// Perform common combines for BR_CC and SELECT_CC conditions.
5757static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5758 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5759 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5760
5761 // As far as arithmetic right shift always saves the sign,
5762 // shift can be omitted.
5763 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5764 // setge (sra X, N), 0 -> setge X, 0
5765 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5766 LHS.getOpcode() == ISD::SRA) {
5767 LHS = LHS.getOperand(0);
5768 return true;
5769 }
5770
5771 if (!ISD::isIntEqualitySetCC(CCVal))
5772 return false;
5773
5774 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5775 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5776 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5777 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5778 // If we're looking for eq 0 instead of ne 0, we need to invert the
5779 // condition.
5780 bool Invert = CCVal == ISD::SETEQ;
5781 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5782 if (Invert)
5783 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5784
5785 RHS = LHS.getOperand(1);
5786 LHS = LHS.getOperand(0);
5787 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5788
5789 CC = DAG.getCondCode(CCVal);
5790 return true;
5791 }
5792
5793 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5794 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5795 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5796 SDValue LHS0 = LHS.getOperand(0);
5797 if (LHS0.getOpcode() == ISD::AND &&
5798 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5799 uint64_t Mask = LHS0.getConstantOperandVal(1);
5800 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5801 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5802 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5803 CC = DAG.getCondCode(CCVal);
5804
5805 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5806 LHS = LHS0.getOperand(0);
5807 if (ShAmt != 0)
5808 LHS =
5809 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5810 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5811 return true;
5812 }
5813 }
5814 }
5815
5816 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5817 // This can occur when legalizing some floating point comparisons.
5818 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5819 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5820 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5821 CC = DAG.getCondCode(CCVal);
5822 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5823 return true;
5824 }
5825
5826 return false;
5827}
5828
5831 const LoongArchSubtarget &Subtarget) {
5832 SDValue LHS = N->getOperand(1);
5833 SDValue RHS = N->getOperand(2);
5834 SDValue CC = N->getOperand(3);
5835 SDLoc DL(N);
5836
5837 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5838 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5839 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5840
5841 return SDValue();
5842}
5843
5846 const LoongArchSubtarget &Subtarget) {
5847 // Transform
5848 SDValue LHS = N->getOperand(0);
5849 SDValue RHS = N->getOperand(1);
5850 SDValue CC = N->getOperand(2);
5851 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5852 SDValue TrueV = N->getOperand(3);
5853 SDValue FalseV = N->getOperand(4);
5854 SDLoc DL(N);
5855 EVT VT = N->getValueType(0);
5856
5857 // If the True and False values are the same, we don't need a select_cc.
5858 if (TrueV == FalseV)
5859 return TrueV;
5860
5861 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5862 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5863 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5865 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5866 if (CCVal == ISD::CondCode::SETGE)
5867 std::swap(TrueV, FalseV);
5868
5869 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5870 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5871 // Only handle simm12, if it is not in this range, it can be considered as
5872 // register.
5873 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5874 isInt<12>(TrueSImm - FalseSImm)) {
5875 SDValue SRA =
5876 DAG.getNode(ISD::SRA, DL, VT, LHS,
5877 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5878 SDValue AND =
5879 DAG.getNode(ISD::AND, DL, VT, SRA,
5880 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5881 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5882 }
5883
5884 if (CCVal == ISD::CondCode::SETGE)
5885 std::swap(TrueV, FalseV);
5886 }
5887
5888 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5889 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5890 {LHS, RHS, CC, TrueV, FalseV});
5891
5892 return SDValue();
5893}
5894
5895template <unsigned N>
5897 SelectionDAG &DAG,
5898 const LoongArchSubtarget &Subtarget,
5899 bool IsSigned = false) {
5900 SDLoc DL(Node);
5901 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5902 // Check the ImmArg.
5903 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5904 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5905 DAG.getContext()->emitError(Node->getOperationName(0) +
5906 ": argument out of range.");
5907 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5908 }
5909 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5910}
5911
5912template <unsigned N>
5913static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5914 SelectionDAG &DAG, bool IsSigned = false) {
5915 SDLoc DL(Node);
5916 EVT ResTy = Node->getValueType(0);
5917 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5918
5919 // Check the ImmArg.
5920 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5921 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5922 DAG.getContext()->emitError(Node->getOperationName(0) +
5923 ": argument out of range.");
5924 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5925 }
5926 return DAG.getConstant(
5928 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5929 DL, ResTy);
5930}
5931
5933 SDLoc DL(Node);
5934 EVT ResTy = Node->getValueType(0);
5935 SDValue Vec = Node->getOperand(2);
5936 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5937 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5938}
5939
5941 SDLoc DL(Node);
5942 EVT ResTy = Node->getValueType(0);
5943 SDValue One = DAG.getConstant(1, DL, ResTy);
5944 SDValue Bit =
5945 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5946
5947 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5948 DAG.getNOT(DL, Bit, ResTy));
5949}
5950
5951template <unsigned N>
5953 SDLoc DL(Node);
5954 EVT ResTy = Node->getValueType(0);
5955 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5956 // Check the unsigned ImmArg.
5957 if (!isUInt<N>(CImm->getZExtValue())) {
5958 DAG.getContext()->emitError(Node->getOperationName(0) +
5959 ": argument out of range.");
5960 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5961 }
5962
5963 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5964 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5965
5966 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5967}
5968
5969template <unsigned N>
5971 SDLoc DL(Node);
5972 EVT ResTy = Node->getValueType(0);
5973 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5974 // Check the unsigned ImmArg.
5975 if (!isUInt<N>(CImm->getZExtValue())) {
5976 DAG.getContext()->emitError(Node->getOperationName(0) +
5977 ": argument out of range.");
5978 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5979 }
5980
5981 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5982 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5983 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5984}
5985
5986template <unsigned N>
5988 SDLoc DL(Node);
5989 EVT ResTy = Node->getValueType(0);
5990 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5991 // Check the unsigned ImmArg.
5992 if (!isUInt<N>(CImm->getZExtValue())) {
5993 DAG.getContext()->emitError(Node->getOperationName(0) +
5994 ": argument out of range.");
5995 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5996 }
5997
5998 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5999 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6000 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6001}
6002
6003template <unsigned W>
6005 unsigned ResOp) {
6006 unsigned Imm = N->getConstantOperandVal(2);
6007 if (!isUInt<W>(Imm)) {
6008 const StringRef ErrorMsg = "argument out of range";
6009 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6010 return DAG.getUNDEF(N->getValueType(0));
6011 }
6012 SDLoc DL(N);
6013 SDValue Vec = N->getOperand(1);
6014 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6016 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6017}
6018
6019static SDValue
6022 const LoongArchSubtarget &Subtarget) {
6023 SDLoc DL(N);
6024 switch (N->getConstantOperandVal(0)) {
6025 default:
6026 break;
6027 case Intrinsic::loongarch_lsx_vadd_b:
6028 case Intrinsic::loongarch_lsx_vadd_h:
6029 case Intrinsic::loongarch_lsx_vadd_w:
6030 case Intrinsic::loongarch_lsx_vadd_d:
6031 case Intrinsic::loongarch_lasx_xvadd_b:
6032 case Intrinsic::loongarch_lasx_xvadd_h:
6033 case Intrinsic::loongarch_lasx_xvadd_w:
6034 case Intrinsic::loongarch_lasx_xvadd_d:
6035 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6036 N->getOperand(2));
6037 case Intrinsic::loongarch_lsx_vaddi_bu:
6038 case Intrinsic::loongarch_lsx_vaddi_hu:
6039 case Intrinsic::loongarch_lsx_vaddi_wu:
6040 case Intrinsic::loongarch_lsx_vaddi_du:
6041 case Intrinsic::loongarch_lasx_xvaddi_bu:
6042 case Intrinsic::loongarch_lasx_xvaddi_hu:
6043 case Intrinsic::loongarch_lasx_xvaddi_wu:
6044 case Intrinsic::loongarch_lasx_xvaddi_du:
6045 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6046 lowerVectorSplatImm<5>(N, 2, DAG));
6047 case Intrinsic::loongarch_lsx_vsub_b:
6048 case Intrinsic::loongarch_lsx_vsub_h:
6049 case Intrinsic::loongarch_lsx_vsub_w:
6050 case Intrinsic::loongarch_lsx_vsub_d:
6051 case Intrinsic::loongarch_lasx_xvsub_b:
6052 case Intrinsic::loongarch_lasx_xvsub_h:
6053 case Intrinsic::loongarch_lasx_xvsub_w:
6054 case Intrinsic::loongarch_lasx_xvsub_d:
6055 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6056 N->getOperand(2));
6057 case Intrinsic::loongarch_lsx_vsubi_bu:
6058 case Intrinsic::loongarch_lsx_vsubi_hu:
6059 case Intrinsic::loongarch_lsx_vsubi_wu:
6060 case Intrinsic::loongarch_lsx_vsubi_du:
6061 case Intrinsic::loongarch_lasx_xvsubi_bu:
6062 case Intrinsic::loongarch_lasx_xvsubi_hu:
6063 case Intrinsic::loongarch_lasx_xvsubi_wu:
6064 case Intrinsic::loongarch_lasx_xvsubi_du:
6065 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6066 lowerVectorSplatImm<5>(N, 2, DAG));
6067 case Intrinsic::loongarch_lsx_vneg_b:
6068 case Intrinsic::loongarch_lsx_vneg_h:
6069 case Intrinsic::loongarch_lsx_vneg_w:
6070 case Intrinsic::loongarch_lsx_vneg_d:
6071 case Intrinsic::loongarch_lasx_xvneg_b:
6072 case Intrinsic::loongarch_lasx_xvneg_h:
6073 case Intrinsic::loongarch_lasx_xvneg_w:
6074 case Intrinsic::loongarch_lasx_xvneg_d:
6075 return DAG.getNode(
6076 ISD::SUB, DL, N->getValueType(0),
6077 DAG.getConstant(
6078 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6079 /*isSigned=*/true),
6080 SDLoc(N), N->getValueType(0)),
6081 N->getOperand(1));
6082 case Intrinsic::loongarch_lsx_vmax_b:
6083 case Intrinsic::loongarch_lsx_vmax_h:
6084 case Intrinsic::loongarch_lsx_vmax_w:
6085 case Intrinsic::loongarch_lsx_vmax_d:
6086 case Intrinsic::loongarch_lasx_xvmax_b:
6087 case Intrinsic::loongarch_lasx_xvmax_h:
6088 case Intrinsic::loongarch_lasx_xvmax_w:
6089 case Intrinsic::loongarch_lasx_xvmax_d:
6090 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6091 N->getOperand(2));
6092 case Intrinsic::loongarch_lsx_vmax_bu:
6093 case Intrinsic::loongarch_lsx_vmax_hu:
6094 case Intrinsic::loongarch_lsx_vmax_wu:
6095 case Intrinsic::loongarch_lsx_vmax_du:
6096 case Intrinsic::loongarch_lasx_xvmax_bu:
6097 case Intrinsic::loongarch_lasx_xvmax_hu:
6098 case Intrinsic::loongarch_lasx_xvmax_wu:
6099 case Intrinsic::loongarch_lasx_xvmax_du:
6100 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6101 N->getOperand(2));
6102 case Intrinsic::loongarch_lsx_vmaxi_b:
6103 case Intrinsic::loongarch_lsx_vmaxi_h:
6104 case Intrinsic::loongarch_lsx_vmaxi_w:
6105 case Intrinsic::loongarch_lsx_vmaxi_d:
6106 case Intrinsic::loongarch_lasx_xvmaxi_b:
6107 case Intrinsic::loongarch_lasx_xvmaxi_h:
6108 case Intrinsic::loongarch_lasx_xvmaxi_w:
6109 case Intrinsic::loongarch_lasx_xvmaxi_d:
6110 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6111 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6112 case Intrinsic::loongarch_lsx_vmaxi_bu:
6113 case Intrinsic::loongarch_lsx_vmaxi_hu:
6114 case Intrinsic::loongarch_lsx_vmaxi_wu:
6115 case Intrinsic::loongarch_lsx_vmaxi_du:
6116 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6117 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6118 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6119 case Intrinsic::loongarch_lasx_xvmaxi_du:
6120 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6121 lowerVectorSplatImm<5>(N, 2, DAG));
6122 case Intrinsic::loongarch_lsx_vmin_b:
6123 case Intrinsic::loongarch_lsx_vmin_h:
6124 case Intrinsic::loongarch_lsx_vmin_w:
6125 case Intrinsic::loongarch_lsx_vmin_d:
6126 case Intrinsic::loongarch_lasx_xvmin_b:
6127 case Intrinsic::loongarch_lasx_xvmin_h:
6128 case Intrinsic::loongarch_lasx_xvmin_w:
6129 case Intrinsic::loongarch_lasx_xvmin_d:
6130 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6131 N->getOperand(2));
6132 case Intrinsic::loongarch_lsx_vmin_bu:
6133 case Intrinsic::loongarch_lsx_vmin_hu:
6134 case Intrinsic::loongarch_lsx_vmin_wu:
6135 case Intrinsic::loongarch_lsx_vmin_du:
6136 case Intrinsic::loongarch_lasx_xvmin_bu:
6137 case Intrinsic::loongarch_lasx_xvmin_hu:
6138 case Intrinsic::loongarch_lasx_xvmin_wu:
6139 case Intrinsic::loongarch_lasx_xvmin_du:
6140 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6141 N->getOperand(2));
6142 case Intrinsic::loongarch_lsx_vmini_b:
6143 case Intrinsic::loongarch_lsx_vmini_h:
6144 case Intrinsic::loongarch_lsx_vmini_w:
6145 case Intrinsic::loongarch_lsx_vmini_d:
6146 case Intrinsic::loongarch_lasx_xvmini_b:
6147 case Intrinsic::loongarch_lasx_xvmini_h:
6148 case Intrinsic::loongarch_lasx_xvmini_w:
6149 case Intrinsic::loongarch_lasx_xvmini_d:
6150 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6151 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6152 case Intrinsic::loongarch_lsx_vmini_bu:
6153 case Intrinsic::loongarch_lsx_vmini_hu:
6154 case Intrinsic::loongarch_lsx_vmini_wu:
6155 case Intrinsic::loongarch_lsx_vmini_du:
6156 case Intrinsic::loongarch_lasx_xvmini_bu:
6157 case Intrinsic::loongarch_lasx_xvmini_hu:
6158 case Intrinsic::loongarch_lasx_xvmini_wu:
6159 case Intrinsic::loongarch_lasx_xvmini_du:
6160 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6161 lowerVectorSplatImm<5>(N, 2, DAG));
6162 case Intrinsic::loongarch_lsx_vmul_b:
6163 case Intrinsic::loongarch_lsx_vmul_h:
6164 case Intrinsic::loongarch_lsx_vmul_w:
6165 case Intrinsic::loongarch_lsx_vmul_d:
6166 case Intrinsic::loongarch_lasx_xvmul_b:
6167 case Intrinsic::loongarch_lasx_xvmul_h:
6168 case Intrinsic::loongarch_lasx_xvmul_w:
6169 case Intrinsic::loongarch_lasx_xvmul_d:
6170 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6171 N->getOperand(2));
6172 case Intrinsic::loongarch_lsx_vmadd_b:
6173 case Intrinsic::loongarch_lsx_vmadd_h:
6174 case Intrinsic::loongarch_lsx_vmadd_w:
6175 case Intrinsic::loongarch_lsx_vmadd_d:
6176 case Intrinsic::loongarch_lasx_xvmadd_b:
6177 case Intrinsic::loongarch_lasx_xvmadd_h:
6178 case Intrinsic::loongarch_lasx_xvmadd_w:
6179 case Intrinsic::loongarch_lasx_xvmadd_d: {
6180 EVT ResTy = N->getValueType(0);
6181 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6182 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6183 N->getOperand(3)));
6184 }
6185 case Intrinsic::loongarch_lsx_vmsub_b:
6186 case Intrinsic::loongarch_lsx_vmsub_h:
6187 case Intrinsic::loongarch_lsx_vmsub_w:
6188 case Intrinsic::loongarch_lsx_vmsub_d:
6189 case Intrinsic::loongarch_lasx_xvmsub_b:
6190 case Intrinsic::loongarch_lasx_xvmsub_h:
6191 case Intrinsic::loongarch_lasx_xvmsub_w:
6192 case Intrinsic::loongarch_lasx_xvmsub_d: {
6193 EVT ResTy = N->getValueType(0);
6194 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6195 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6196 N->getOperand(3)));
6197 }
6198 case Intrinsic::loongarch_lsx_vdiv_b:
6199 case Intrinsic::loongarch_lsx_vdiv_h:
6200 case Intrinsic::loongarch_lsx_vdiv_w:
6201 case Intrinsic::loongarch_lsx_vdiv_d:
6202 case Intrinsic::loongarch_lasx_xvdiv_b:
6203 case Intrinsic::loongarch_lasx_xvdiv_h:
6204 case Intrinsic::loongarch_lasx_xvdiv_w:
6205 case Intrinsic::loongarch_lasx_xvdiv_d:
6206 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6207 N->getOperand(2));
6208 case Intrinsic::loongarch_lsx_vdiv_bu:
6209 case Intrinsic::loongarch_lsx_vdiv_hu:
6210 case Intrinsic::loongarch_lsx_vdiv_wu:
6211 case Intrinsic::loongarch_lsx_vdiv_du:
6212 case Intrinsic::loongarch_lasx_xvdiv_bu:
6213 case Intrinsic::loongarch_lasx_xvdiv_hu:
6214 case Intrinsic::loongarch_lasx_xvdiv_wu:
6215 case Intrinsic::loongarch_lasx_xvdiv_du:
6216 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6217 N->getOperand(2));
6218 case Intrinsic::loongarch_lsx_vmod_b:
6219 case Intrinsic::loongarch_lsx_vmod_h:
6220 case Intrinsic::loongarch_lsx_vmod_w:
6221 case Intrinsic::loongarch_lsx_vmod_d:
6222 case Intrinsic::loongarch_lasx_xvmod_b:
6223 case Intrinsic::loongarch_lasx_xvmod_h:
6224 case Intrinsic::loongarch_lasx_xvmod_w:
6225 case Intrinsic::loongarch_lasx_xvmod_d:
6226 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6227 N->getOperand(2));
6228 case Intrinsic::loongarch_lsx_vmod_bu:
6229 case Intrinsic::loongarch_lsx_vmod_hu:
6230 case Intrinsic::loongarch_lsx_vmod_wu:
6231 case Intrinsic::loongarch_lsx_vmod_du:
6232 case Intrinsic::loongarch_lasx_xvmod_bu:
6233 case Intrinsic::loongarch_lasx_xvmod_hu:
6234 case Intrinsic::loongarch_lasx_xvmod_wu:
6235 case Intrinsic::loongarch_lasx_xvmod_du:
6236 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6237 N->getOperand(2));
6238 case Intrinsic::loongarch_lsx_vand_v:
6239 case Intrinsic::loongarch_lasx_xvand_v:
6240 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6241 N->getOperand(2));
6242 case Intrinsic::loongarch_lsx_vor_v:
6243 case Intrinsic::loongarch_lasx_xvor_v:
6244 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6245 N->getOperand(2));
6246 case Intrinsic::loongarch_lsx_vxor_v:
6247 case Intrinsic::loongarch_lasx_xvxor_v:
6248 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6249 N->getOperand(2));
6250 case Intrinsic::loongarch_lsx_vnor_v:
6251 case Intrinsic::loongarch_lasx_xvnor_v: {
6252 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6253 N->getOperand(2));
6254 return DAG.getNOT(DL, Res, Res->getValueType(0));
6255 }
6256 case Intrinsic::loongarch_lsx_vandi_b:
6257 case Intrinsic::loongarch_lasx_xvandi_b:
6258 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6259 lowerVectorSplatImm<8>(N, 2, DAG));
6260 case Intrinsic::loongarch_lsx_vori_b:
6261 case Intrinsic::loongarch_lasx_xvori_b:
6262 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6263 lowerVectorSplatImm<8>(N, 2, DAG));
6264 case Intrinsic::loongarch_lsx_vxori_b:
6265 case Intrinsic::loongarch_lasx_xvxori_b:
6266 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6267 lowerVectorSplatImm<8>(N, 2, DAG));
6268 case Intrinsic::loongarch_lsx_vsll_b:
6269 case Intrinsic::loongarch_lsx_vsll_h:
6270 case Intrinsic::loongarch_lsx_vsll_w:
6271 case Intrinsic::loongarch_lsx_vsll_d:
6272 case Intrinsic::loongarch_lasx_xvsll_b:
6273 case Intrinsic::loongarch_lasx_xvsll_h:
6274 case Intrinsic::loongarch_lasx_xvsll_w:
6275 case Intrinsic::loongarch_lasx_xvsll_d:
6276 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6277 truncateVecElts(N, DAG));
6278 case Intrinsic::loongarch_lsx_vslli_b:
6279 case Intrinsic::loongarch_lasx_xvslli_b:
6280 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6281 lowerVectorSplatImm<3>(N, 2, DAG));
6282 case Intrinsic::loongarch_lsx_vslli_h:
6283 case Intrinsic::loongarch_lasx_xvslli_h:
6284 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6285 lowerVectorSplatImm<4>(N, 2, DAG));
6286 case Intrinsic::loongarch_lsx_vslli_w:
6287 case Intrinsic::loongarch_lasx_xvslli_w:
6288 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6289 lowerVectorSplatImm<5>(N, 2, DAG));
6290 case Intrinsic::loongarch_lsx_vslli_d:
6291 case Intrinsic::loongarch_lasx_xvslli_d:
6292 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6293 lowerVectorSplatImm<6>(N, 2, DAG));
6294 case Intrinsic::loongarch_lsx_vsrl_b:
6295 case Intrinsic::loongarch_lsx_vsrl_h:
6296 case Intrinsic::loongarch_lsx_vsrl_w:
6297 case Intrinsic::loongarch_lsx_vsrl_d:
6298 case Intrinsic::loongarch_lasx_xvsrl_b:
6299 case Intrinsic::loongarch_lasx_xvsrl_h:
6300 case Intrinsic::loongarch_lasx_xvsrl_w:
6301 case Intrinsic::loongarch_lasx_xvsrl_d:
6302 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6303 truncateVecElts(N, DAG));
6304 case Intrinsic::loongarch_lsx_vsrli_b:
6305 case Intrinsic::loongarch_lasx_xvsrli_b:
6306 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6307 lowerVectorSplatImm<3>(N, 2, DAG));
6308 case Intrinsic::loongarch_lsx_vsrli_h:
6309 case Intrinsic::loongarch_lasx_xvsrli_h:
6310 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6311 lowerVectorSplatImm<4>(N, 2, DAG));
6312 case Intrinsic::loongarch_lsx_vsrli_w:
6313 case Intrinsic::loongarch_lasx_xvsrli_w:
6314 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6315 lowerVectorSplatImm<5>(N, 2, DAG));
6316 case Intrinsic::loongarch_lsx_vsrli_d:
6317 case Intrinsic::loongarch_lasx_xvsrli_d:
6318 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6319 lowerVectorSplatImm<6>(N, 2, DAG));
6320 case Intrinsic::loongarch_lsx_vsra_b:
6321 case Intrinsic::loongarch_lsx_vsra_h:
6322 case Intrinsic::loongarch_lsx_vsra_w:
6323 case Intrinsic::loongarch_lsx_vsra_d:
6324 case Intrinsic::loongarch_lasx_xvsra_b:
6325 case Intrinsic::loongarch_lasx_xvsra_h:
6326 case Intrinsic::loongarch_lasx_xvsra_w:
6327 case Intrinsic::loongarch_lasx_xvsra_d:
6328 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6329 truncateVecElts(N, DAG));
6330 case Intrinsic::loongarch_lsx_vsrai_b:
6331 case Intrinsic::loongarch_lasx_xvsrai_b:
6332 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6333 lowerVectorSplatImm<3>(N, 2, DAG));
6334 case Intrinsic::loongarch_lsx_vsrai_h:
6335 case Intrinsic::loongarch_lasx_xvsrai_h:
6336 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6337 lowerVectorSplatImm<4>(N, 2, DAG));
6338 case Intrinsic::loongarch_lsx_vsrai_w:
6339 case Intrinsic::loongarch_lasx_xvsrai_w:
6340 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6341 lowerVectorSplatImm<5>(N, 2, DAG));
6342 case Intrinsic::loongarch_lsx_vsrai_d:
6343 case Intrinsic::loongarch_lasx_xvsrai_d:
6344 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6345 lowerVectorSplatImm<6>(N, 2, DAG));
6346 case Intrinsic::loongarch_lsx_vclz_b:
6347 case Intrinsic::loongarch_lsx_vclz_h:
6348 case Intrinsic::loongarch_lsx_vclz_w:
6349 case Intrinsic::loongarch_lsx_vclz_d:
6350 case Intrinsic::loongarch_lasx_xvclz_b:
6351 case Intrinsic::loongarch_lasx_xvclz_h:
6352 case Intrinsic::loongarch_lasx_xvclz_w:
6353 case Intrinsic::loongarch_lasx_xvclz_d:
6354 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6355 case Intrinsic::loongarch_lsx_vpcnt_b:
6356 case Intrinsic::loongarch_lsx_vpcnt_h:
6357 case Intrinsic::loongarch_lsx_vpcnt_w:
6358 case Intrinsic::loongarch_lsx_vpcnt_d:
6359 case Intrinsic::loongarch_lasx_xvpcnt_b:
6360 case Intrinsic::loongarch_lasx_xvpcnt_h:
6361 case Intrinsic::loongarch_lasx_xvpcnt_w:
6362 case Intrinsic::loongarch_lasx_xvpcnt_d:
6363 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6364 case Intrinsic::loongarch_lsx_vbitclr_b:
6365 case Intrinsic::loongarch_lsx_vbitclr_h:
6366 case Intrinsic::loongarch_lsx_vbitclr_w:
6367 case Intrinsic::loongarch_lsx_vbitclr_d:
6368 case Intrinsic::loongarch_lasx_xvbitclr_b:
6369 case Intrinsic::loongarch_lasx_xvbitclr_h:
6370 case Intrinsic::loongarch_lasx_xvbitclr_w:
6371 case Intrinsic::loongarch_lasx_xvbitclr_d:
6372 return lowerVectorBitClear(N, DAG);
6373 case Intrinsic::loongarch_lsx_vbitclri_b:
6374 case Intrinsic::loongarch_lasx_xvbitclri_b:
6375 return lowerVectorBitClearImm<3>(N, DAG);
6376 case Intrinsic::loongarch_lsx_vbitclri_h:
6377 case Intrinsic::loongarch_lasx_xvbitclri_h:
6378 return lowerVectorBitClearImm<4>(N, DAG);
6379 case Intrinsic::loongarch_lsx_vbitclri_w:
6380 case Intrinsic::loongarch_lasx_xvbitclri_w:
6381 return lowerVectorBitClearImm<5>(N, DAG);
6382 case Intrinsic::loongarch_lsx_vbitclri_d:
6383 case Intrinsic::loongarch_lasx_xvbitclri_d:
6384 return lowerVectorBitClearImm<6>(N, DAG);
6385 case Intrinsic::loongarch_lsx_vbitset_b:
6386 case Intrinsic::loongarch_lsx_vbitset_h:
6387 case Intrinsic::loongarch_lsx_vbitset_w:
6388 case Intrinsic::loongarch_lsx_vbitset_d:
6389 case Intrinsic::loongarch_lasx_xvbitset_b:
6390 case Intrinsic::loongarch_lasx_xvbitset_h:
6391 case Intrinsic::loongarch_lasx_xvbitset_w:
6392 case Intrinsic::loongarch_lasx_xvbitset_d: {
6393 EVT VecTy = N->getValueType(0);
6394 SDValue One = DAG.getConstant(1, DL, VecTy);
6395 return DAG.getNode(
6396 ISD::OR, DL, VecTy, N->getOperand(1),
6397 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6398 }
6399 case Intrinsic::loongarch_lsx_vbitseti_b:
6400 case Intrinsic::loongarch_lasx_xvbitseti_b:
6401 return lowerVectorBitSetImm<3>(N, DAG);
6402 case Intrinsic::loongarch_lsx_vbitseti_h:
6403 case Intrinsic::loongarch_lasx_xvbitseti_h:
6404 return lowerVectorBitSetImm<4>(N, DAG);
6405 case Intrinsic::loongarch_lsx_vbitseti_w:
6406 case Intrinsic::loongarch_lasx_xvbitseti_w:
6407 return lowerVectorBitSetImm<5>(N, DAG);
6408 case Intrinsic::loongarch_lsx_vbitseti_d:
6409 case Intrinsic::loongarch_lasx_xvbitseti_d:
6410 return lowerVectorBitSetImm<6>(N, DAG);
6411 case Intrinsic::loongarch_lsx_vbitrev_b:
6412 case Intrinsic::loongarch_lsx_vbitrev_h:
6413 case Intrinsic::loongarch_lsx_vbitrev_w:
6414 case Intrinsic::loongarch_lsx_vbitrev_d:
6415 case Intrinsic::loongarch_lasx_xvbitrev_b:
6416 case Intrinsic::loongarch_lasx_xvbitrev_h:
6417 case Intrinsic::loongarch_lasx_xvbitrev_w:
6418 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6419 EVT VecTy = N->getValueType(0);
6420 SDValue One = DAG.getConstant(1, DL, VecTy);
6421 return DAG.getNode(
6422 ISD::XOR, DL, VecTy, N->getOperand(1),
6423 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6424 }
6425 case Intrinsic::loongarch_lsx_vbitrevi_b:
6426 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6427 return lowerVectorBitRevImm<3>(N, DAG);
6428 case Intrinsic::loongarch_lsx_vbitrevi_h:
6429 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6430 return lowerVectorBitRevImm<4>(N, DAG);
6431 case Intrinsic::loongarch_lsx_vbitrevi_w:
6432 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6433 return lowerVectorBitRevImm<5>(N, DAG);
6434 case Intrinsic::loongarch_lsx_vbitrevi_d:
6435 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6436 return lowerVectorBitRevImm<6>(N, DAG);
6437 case Intrinsic::loongarch_lsx_vfadd_s:
6438 case Intrinsic::loongarch_lsx_vfadd_d:
6439 case Intrinsic::loongarch_lasx_xvfadd_s:
6440 case Intrinsic::loongarch_lasx_xvfadd_d:
6441 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6442 N->getOperand(2));
6443 case Intrinsic::loongarch_lsx_vfsub_s:
6444 case Intrinsic::loongarch_lsx_vfsub_d:
6445 case Intrinsic::loongarch_lasx_xvfsub_s:
6446 case Intrinsic::loongarch_lasx_xvfsub_d:
6447 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6448 N->getOperand(2));
6449 case Intrinsic::loongarch_lsx_vfmul_s:
6450 case Intrinsic::loongarch_lsx_vfmul_d:
6451 case Intrinsic::loongarch_lasx_xvfmul_s:
6452 case Intrinsic::loongarch_lasx_xvfmul_d:
6453 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6454 N->getOperand(2));
6455 case Intrinsic::loongarch_lsx_vfdiv_s:
6456 case Intrinsic::loongarch_lsx_vfdiv_d:
6457 case Intrinsic::loongarch_lasx_xvfdiv_s:
6458 case Intrinsic::loongarch_lasx_xvfdiv_d:
6459 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6460 N->getOperand(2));
6461 case Intrinsic::loongarch_lsx_vfmadd_s:
6462 case Intrinsic::loongarch_lsx_vfmadd_d:
6463 case Intrinsic::loongarch_lasx_xvfmadd_s:
6464 case Intrinsic::loongarch_lasx_xvfmadd_d:
6465 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6466 N->getOperand(2), N->getOperand(3));
6467 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6468 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6469 N->getOperand(1), N->getOperand(2),
6470 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6471 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6472 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6473 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6474 N->getOperand(1), N->getOperand(2),
6475 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6476 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6477 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6478 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6479 N->getOperand(1), N->getOperand(2),
6480 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6481 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6482 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6483 N->getOperand(1), N->getOperand(2),
6484 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6485 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6486 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6487 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6488 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6489 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6490 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6491 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6492 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6493 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6494 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6495 N->getOperand(1)));
6496 case Intrinsic::loongarch_lsx_vreplve_b:
6497 case Intrinsic::loongarch_lsx_vreplve_h:
6498 case Intrinsic::loongarch_lsx_vreplve_w:
6499 case Intrinsic::loongarch_lsx_vreplve_d:
6500 case Intrinsic::loongarch_lasx_xvreplve_b:
6501 case Intrinsic::loongarch_lasx_xvreplve_h:
6502 case Intrinsic::loongarch_lasx_xvreplve_w:
6503 case Intrinsic::loongarch_lasx_xvreplve_d:
6504 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6505 N->getOperand(1),
6506 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6507 N->getOperand(2)));
6508 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6509 if (!Subtarget.is64Bit())
6511 break;
6512 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6513 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6514 if (!Subtarget.is64Bit())
6516 break;
6517 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6518 if (!Subtarget.is64Bit())
6520 break;
6521 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6522 if (!Subtarget.is64Bit())
6524 break;
6525 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6526 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6527 if (!Subtarget.is64Bit())
6529 break;
6530 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6531 if (!Subtarget.is64Bit())
6533 break;
6534 case Intrinsic::loongarch_lsx_bz_b:
6535 case Intrinsic::loongarch_lsx_bz_h:
6536 case Intrinsic::loongarch_lsx_bz_w:
6537 case Intrinsic::loongarch_lsx_bz_d:
6538 case Intrinsic::loongarch_lasx_xbz_b:
6539 case Intrinsic::loongarch_lasx_xbz_h:
6540 case Intrinsic::loongarch_lasx_xbz_w:
6541 case Intrinsic::loongarch_lasx_xbz_d:
6542 if (!Subtarget.is64Bit())
6543 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6544 N->getOperand(1));
6545 break;
6546 case Intrinsic::loongarch_lsx_bz_v:
6547 case Intrinsic::loongarch_lasx_xbz_v:
6548 if (!Subtarget.is64Bit())
6549 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6550 N->getOperand(1));
6551 break;
6552 case Intrinsic::loongarch_lsx_bnz_b:
6553 case Intrinsic::loongarch_lsx_bnz_h:
6554 case Intrinsic::loongarch_lsx_bnz_w:
6555 case Intrinsic::loongarch_lsx_bnz_d:
6556 case Intrinsic::loongarch_lasx_xbnz_b:
6557 case Intrinsic::loongarch_lasx_xbnz_h:
6558 case Intrinsic::loongarch_lasx_xbnz_w:
6559 case Intrinsic::loongarch_lasx_xbnz_d:
6560 if (!Subtarget.is64Bit())
6561 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6562 N->getOperand(1));
6563 break;
6564 case Intrinsic::loongarch_lsx_bnz_v:
6565 case Intrinsic::loongarch_lasx_xbnz_v:
6566 if (!Subtarget.is64Bit())
6567 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6568 N->getOperand(1));
6569 break;
6570 }
6571 return SDValue();
6572}
6573
6576 const LoongArchSubtarget &Subtarget) {
6577 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6578 // conversion is unnecessary and can be replaced with the
6579 // MOVFR2GR_S_LA64 operand.
6580 SDValue Op0 = N->getOperand(0);
6582 return Op0.getOperand(0);
6583 return SDValue();
6584}
6585
6588 const LoongArchSubtarget &Subtarget) {
6589 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6590 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6591 // operand.
6592 SDValue Op0 = N->getOperand(0);
6594 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6595 "Unexpected value type!");
6596 return Op0.getOperand(0);
6597 }
6598 return SDValue();
6599}
6600
6603 const LoongArchSubtarget &Subtarget) {
6604 MVT VT = N->getSimpleValueType(0);
6605 unsigned NumBits = VT.getScalarSizeInBits();
6606
6607 // Simplify the inputs.
6608 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6609 APInt DemandedMask(APInt::getAllOnes(NumBits));
6610 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6611 return SDValue(N, 0);
6612
6613 return SDValue();
6614}
6615
6616static SDValue
6619 const LoongArchSubtarget &Subtarget) {
6620 SDValue Op0 = N->getOperand(0);
6621 SDLoc DL(N);
6622
6623 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6624 // redundant. Instead, use BuildPairF64's operands directly.
6626 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6627
6628 if (Op0->isUndef()) {
6629 SDValue Lo = DAG.getUNDEF(MVT::i32);
6630 SDValue Hi = DAG.getUNDEF(MVT::i32);
6631 return DCI.CombineTo(N, Lo, Hi);
6632 }
6633
6634 // It's cheaper to materialise two 32-bit integers than to load a double
6635 // from the constant pool and transfer it to integer registers through the
6636 // stack.
6638 APInt V = C->getValueAPF().bitcastToAPInt();
6639 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6640 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6641 return DCI.CombineTo(N, Lo, Hi);
6642 }
6643
6644 return SDValue();
6645}
6646
6647static SDValue
6650 const LoongArchSubtarget &Subtarget) {
6651 if (!DCI.isBeforeLegalize())
6652 return SDValue();
6653
6654 MVT EltVT = N->getSimpleValueType(0);
6655 SDValue Vec = N->getOperand(0);
6656 EVT VecTy = Vec->getValueType(0);
6657 SDValue Idx = N->getOperand(1);
6658 unsigned IdxOp = Idx.getOpcode();
6659 SDLoc DL(N);
6660
6661 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6662 return SDValue();
6663
6664 // Combine:
6665 // t2 = truncate t1
6666 // t3 = {zero/sign/any}_extend t2
6667 // t4 = extract_vector_elt t0, t3
6668 // to:
6669 // t4 = extract_vector_elt t0, t1
6670 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6671 IdxOp == ISD::ANY_EXTEND) {
6672 SDValue IdxOrig = Idx.getOperand(0);
6673 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6674 return SDValue();
6675
6676 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6677 IdxOrig.getOperand(0));
6678 }
6679
6680 return SDValue();
6681}
6682
6684 DAGCombinerInfo &DCI) const {
6685 SelectionDAG &DAG = DCI.DAG;
6686 switch (N->getOpcode()) {
6687 default:
6688 break;
6689 case ISD::AND:
6690 return performANDCombine(N, DAG, DCI, Subtarget);
6691 case ISD::OR:
6692 return performORCombine(N, DAG, DCI, Subtarget);
6693 case ISD::SETCC:
6694 return performSETCCCombine(N, DAG, DCI, Subtarget);
6695 case ISD::SRL:
6696 return performSRLCombine(N, DAG, DCI, Subtarget);
6697 case ISD::BITCAST:
6698 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6700 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6702 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6704 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6706 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6708 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6710 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6713 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6715 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6717 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6718 }
6719 return SDValue();
6720}
6721
6724 if (!ZeroDivCheck)
6725 return MBB;
6726
6727 // Build instructions:
6728 // MBB:
6729 // div(or mod) $dst, $dividend, $divisor
6730 // bne $divisor, $zero, SinkMBB
6731 // BreakMBB:
6732 // break 7 // BRK_DIVZERO
6733 // SinkMBB:
6734 // fallthrough
6735 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6736 MachineFunction::iterator It = ++MBB->getIterator();
6737 MachineFunction *MF = MBB->getParent();
6738 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6739 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6740 MF->insert(It, BreakMBB);
6741 MF->insert(It, SinkMBB);
6742
6743 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6744 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6745 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6746
6747 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6748 DebugLoc DL = MI.getDebugLoc();
6749 MachineOperand &Divisor = MI.getOperand(2);
6750 Register DivisorReg = Divisor.getReg();
6751
6752 // MBB:
6753 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6754 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6755 .addReg(LoongArch::R0)
6756 .addMBB(SinkMBB);
6757 MBB->addSuccessor(BreakMBB);
6758 MBB->addSuccessor(SinkMBB);
6759
6760 // BreakMBB:
6761 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6762 // definition of BRK_DIVZERO.
6763 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6764 BreakMBB->addSuccessor(SinkMBB);
6765
6766 // Clear Divisor's kill flag.
6767 Divisor.setIsKill(false);
6768
6769 return SinkMBB;
6770}
6771
6772static MachineBasicBlock *
6774 const LoongArchSubtarget &Subtarget) {
6775 unsigned CondOpc;
6776 switch (MI.getOpcode()) {
6777 default:
6778 llvm_unreachable("Unexpected opcode");
6779 case LoongArch::PseudoVBZ:
6780 CondOpc = LoongArch::VSETEQZ_V;
6781 break;
6782 case LoongArch::PseudoVBZ_B:
6783 CondOpc = LoongArch::VSETANYEQZ_B;
6784 break;
6785 case LoongArch::PseudoVBZ_H:
6786 CondOpc = LoongArch::VSETANYEQZ_H;
6787 break;
6788 case LoongArch::PseudoVBZ_W:
6789 CondOpc = LoongArch::VSETANYEQZ_W;
6790 break;
6791 case LoongArch::PseudoVBZ_D:
6792 CondOpc = LoongArch::VSETANYEQZ_D;
6793 break;
6794 case LoongArch::PseudoVBNZ:
6795 CondOpc = LoongArch::VSETNEZ_V;
6796 break;
6797 case LoongArch::PseudoVBNZ_B:
6798 CondOpc = LoongArch::VSETALLNEZ_B;
6799 break;
6800 case LoongArch::PseudoVBNZ_H:
6801 CondOpc = LoongArch::VSETALLNEZ_H;
6802 break;
6803 case LoongArch::PseudoVBNZ_W:
6804 CondOpc = LoongArch::VSETALLNEZ_W;
6805 break;
6806 case LoongArch::PseudoVBNZ_D:
6807 CondOpc = LoongArch::VSETALLNEZ_D;
6808 break;
6809 case LoongArch::PseudoXVBZ:
6810 CondOpc = LoongArch::XVSETEQZ_V;
6811 break;
6812 case LoongArch::PseudoXVBZ_B:
6813 CondOpc = LoongArch::XVSETANYEQZ_B;
6814 break;
6815 case LoongArch::PseudoXVBZ_H:
6816 CondOpc = LoongArch::XVSETANYEQZ_H;
6817 break;
6818 case LoongArch::PseudoXVBZ_W:
6819 CondOpc = LoongArch::XVSETANYEQZ_W;
6820 break;
6821 case LoongArch::PseudoXVBZ_D:
6822 CondOpc = LoongArch::XVSETANYEQZ_D;
6823 break;
6824 case LoongArch::PseudoXVBNZ:
6825 CondOpc = LoongArch::XVSETNEZ_V;
6826 break;
6827 case LoongArch::PseudoXVBNZ_B:
6828 CondOpc = LoongArch::XVSETALLNEZ_B;
6829 break;
6830 case LoongArch::PseudoXVBNZ_H:
6831 CondOpc = LoongArch::XVSETALLNEZ_H;
6832 break;
6833 case LoongArch::PseudoXVBNZ_W:
6834 CondOpc = LoongArch::XVSETALLNEZ_W;
6835 break;
6836 case LoongArch::PseudoXVBNZ_D:
6837 CondOpc = LoongArch::XVSETALLNEZ_D;
6838 break;
6839 }
6840
6841 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6842 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6843 DebugLoc DL = MI.getDebugLoc();
6846
6847 MachineFunction *F = BB->getParent();
6848 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6849 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6850 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6851
6852 F->insert(It, FalseBB);
6853 F->insert(It, TrueBB);
6854 F->insert(It, SinkBB);
6855
6856 // Transfer the remainder of MBB and its successor edges to Sink.
6857 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6859
6860 // Insert the real instruction to BB.
6861 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6862 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6863
6864 // Insert branch.
6865 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6866 BB->addSuccessor(FalseBB);
6867 BB->addSuccessor(TrueBB);
6868
6869 // FalseBB.
6870 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6871 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6872 .addReg(LoongArch::R0)
6873 .addImm(0);
6874 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6875 FalseBB->addSuccessor(SinkBB);
6876
6877 // TrueBB.
6878 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6879 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6880 .addReg(LoongArch::R0)
6881 .addImm(1);
6882 TrueBB->addSuccessor(SinkBB);
6883
6884 // SinkBB: merge the results.
6885 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6886 MI.getOperand(0).getReg())
6887 .addReg(RD1)
6888 .addMBB(FalseBB)
6889 .addReg(RD2)
6890 .addMBB(TrueBB);
6891
6892 // The pseudo instruction is gone now.
6893 MI.eraseFromParent();
6894 return SinkBB;
6895}
6896
6897static MachineBasicBlock *
6899 const LoongArchSubtarget &Subtarget) {
6900 unsigned InsOp;
6901 unsigned BroadcastOp;
6902 unsigned HalfSize;
6903 switch (MI.getOpcode()) {
6904 default:
6905 llvm_unreachable("Unexpected opcode");
6906 case LoongArch::PseudoXVINSGR2VR_B:
6907 HalfSize = 16;
6908 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6909 InsOp = LoongArch::XVEXTRINS_B;
6910 break;
6911 case LoongArch::PseudoXVINSGR2VR_H:
6912 HalfSize = 8;
6913 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6914 InsOp = LoongArch::XVEXTRINS_H;
6915 break;
6916 }
6917 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6918 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6919 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6920 DebugLoc DL = MI.getDebugLoc();
6922 // XDst = vector_insert XSrc, Elt, Idx
6923 Register XDst = MI.getOperand(0).getReg();
6924 Register XSrc = MI.getOperand(1).getReg();
6925 Register Elt = MI.getOperand(2).getReg();
6926 unsigned Idx = MI.getOperand(3).getImm();
6927
6928 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6929 Idx < HalfSize) {
6930 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6931 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6932
6933 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6934 .addReg(XSrc, 0, LoongArch::sub_128);
6935 BuildMI(*BB, MI, DL,
6936 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6937 : LoongArch::VINSGR2VR_B),
6938 ScratchSubReg2)
6939 .addReg(ScratchSubReg1)
6940 .addReg(Elt)
6941 .addImm(Idx);
6942
6943 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6944 .addImm(0)
6945 .addReg(ScratchSubReg2)
6946 .addImm(LoongArch::sub_128);
6947 } else {
6948 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6949 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6950
6951 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6952
6953 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6954 .addReg(ScratchReg1)
6955 .addReg(XSrc)
6956 .addImm(Idx >= HalfSize ? 48 : 18);
6957
6958 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6959 .addReg(XSrc)
6960 .addReg(ScratchReg2)
6961 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6962 }
6963
6964 MI.eraseFromParent();
6965 return BB;
6966}
6967
6970 const LoongArchSubtarget &Subtarget) {
6971 assert(Subtarget.hasExtLSX());
6972 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6973 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6974 DebugLoc DL = MI.getDebugLoc();
6976 Register Dst = MI.getOperand(0).getReg();
6977 Register Src = MI.getOperand(1).getReg();
6978 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6979 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6980 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6981
6982 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6983 BuildMI(*BB, MI, DL,
6984 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6985 : LoongArch::VINSGR2VR_W),
6986 ScratchReg2)
6987 .addReg(ScratchReg1)
6988 .addReg(Src)
6989 .addImm(0);
6990 BuildMI(
6991 *BB, MI, DL,
6992 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6993 ScratchReg3)
6994 .addReg(ScratchReg2);
6995 BuildMI(*BB, MI, DL,
6996 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6997 : LoongArch::VPICKVE2GR_W),
6998 Dst)
6999 .addReg(ScratchReg3)
7000 .addImm(0);
7001
7002 MI.eraseFromParent();
7003 return BB;
7004}
7005
7006static MachineBasicBlock *
7008 const LoongArchSubtarget &Subtarget) {
7009 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7010 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7011 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7013 Register Dst = MI.getOperand(0).getReg();
7014 Register Src = MI.getOperand(1).getReg();
7015 DebugLoc DL = MI.getDebugLoc();
7016 unsigned EleBits = 8;
7017 unsigned NotOpc = 0;
7018 unsigned MskOpc;
7019
7020 switch (MI.getOpcode()) {
7021 default:
7022 llvm_unreachable("Unexpected opcode");
7023 case LoongArch::PseudoVMSKLTZ_B:
7024 MskOpc = LoongArch::VMSKLTZ_B;
7025 break;
7026 case LoongArch::PseudoVMSKLTZ_H:
7027 MskOpc = LoongArch::VMSKLTZ_H;
7028 EleBits = 16;
7029 break;
7030 case LoongArch::PseudoVMSKLTZ_W:
7031 MskOpc = LoongArch::VMSKLTZ_W;
7032 EleBits = 32;
7033 break;
7034 case LoongArch::PseudoVMSKLTZ_D:
7035 MskOpc = LoongArch::VMSKLTZ_D;
7036 EleBits = 64;
7037 break;
7038 case LoongArch::PseudoVMSKGEZ_B:
7039 MskOpc = LoongArch::VMSKGEZ_B;
7040 break;
7041 case LoongArch::PseudoVMSKEQZ_B:
7042 MskOpc = LoongArch::VMSKNZ_B;
7043 NotOpc = LoongArch::VNOR_V;
7044 break;
7045 case LoongArch::PseudoVMSKNEZ_B:
7046 MskOpc = LoongArch::VMSKNZ_B;
7047 break;
7048 case LoongArch::PseudoXVMSKLTZ_B:
7049 MskOpc = LoongArch::XVMSKLTZ_B;
7050 RC = &LoongArch::LASX256RegClass;
7051 break;
7052 case LoongArch::PseudoXVMSKLTZ_H:
7053 MskOpc = LoongArch::XVMSKLTZ_H;
7054 RC = &LoongArch::LASX256RegClass;
7055 EleBits = 16;
7056 break;
7057 case LoongArch::PseudoXVMSKLTZ_W:
7058 MskOpc = LoongArch::XVMSKLTZ_W;
7059 RC = &LoongArch::LASX256RegClass;
7060 EleBits = 32;
7061 break;
7062 case LoongArch::PseudoXVMSKLTZ_D:
7063 MskOpc = LoongArch::XVMSKLTZ_D;
7064 RC = &LoongArch::LASX256RegClass;
7065 EleBits = 64;
7066 break;
7067 case LoongArch::PseudoXVMSKGEZ_B:
7068 MskOpc = LoongArch::XVMSKGEZ_B;
7069 RC = &LoongArch::LASX256RegClass;
7070 break;
7071 case LoongArch::PseudoXVMSKEQZ_B:
7072 MskOpc = LoongArch::XVMSKNZ_B;
7073 NotOpc = LoongArch::XVNOR_V;
7074 RC = &LoongArch::LASX256RegClass;
7075 break;
7076 case LoongArch::PseudoXVMSKNEZ_B:
7077 MskOpc = LoongArch::XVMSKNZ_B;
7078 RC = &LoongArch::LASX256RegClass;
7079 break;
7080 }
7081
7082 Register Msk = MRI.createVirtualRegister(RC);
7083 if (NotOpc) {
7084 Register Tmp = MRI.createVirtualRegister(RC);
7085 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7086 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7087 .addReg(Tmp, RegState::Kill)
7088 .addReg(Tmp, RegState::Kill);
7089 } else {
7090 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7091 }
7092
7093 if (TRI->getRegSizeInBits(*RC) > 128) {
7094 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7095 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7096 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7097 .addReg(Msk)
7098 .addImm(0);
7099 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7100 .addReg(Msk, RegState::Kill)
7101 .addImm(4);
7102 BuildMI(*BB, MI, DL,
7103 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7104 : LoongArch::BSTRINS_W),
7105 Dst)
7108 .addImm(256 / EleBits - 1)
7109 .addImm(128 / EleBits);
7110 } else {
7111 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7112 .addReg(Msk, RegState::Kill)
7113 .addImm(0);
7114 }
7115
7116 MI.eraseFromParent();
7117 return BB;
7118}
7119
7120static MachineBasicBlock *
7122 const LoongArchSubtarget &Subtarget) {
7123 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7124 "Unexpected instruction");
7125
7126 MachineFunction &MF = *BB->getParent();
7127 DebugLoc DL = MI.getDebugLoc();
7129 Register LoReg = MI.getOperand(0).getReg();
7130 Register HiReg = MI.getOperand(1).getReg();
7131 Register SrcReg = MI.getOperand(2).getReg();
7132
7133 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7134 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7135 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7136 MI.eraseFromParent(); // The pseudo instruction is gone now.
7137 return BB;
7138}
7139
7140static MachineBasicBlock *
7142 const LoongArchSubtarget &Subtarget) {
7143 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7144 "Unexpected instruction");
7145
7146 MachineFunction &MF = *BB->getParent();
7147 DebugLoc DL = MI.getDebugLoc();
7150 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7151 Register DstReg = MI.getOperand(0).getReg();
7152 Register LoReg = MI.getOperand(1).getReg();
7153 Register HiReg = MI.getOperand(2).getReg();
7154
7155 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7156 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7157 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7158 .addReg(TmpReg, RegState::Kill)
7159 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7160 MI.eraseFromParent(); // The pseudo instruction is gone now.
7161 return BB;
7162}
7163
7165 switch (MI.getOpcode()) {
7166 default:
7167 return false;
7168 case LoongArch::Select_GPR_Using_CC_GPR:
7169 return true;
7170 }
7171}
7172
7173static MachineBasicBlock *
7175 const LoongArchSubtarget &Subtarget) {
7176 // To "insert" Select_* instructions, we actually have to insert the triangle
7177 // control-flow pattern. The incoming instructions know the destination vreg
7178 // to set, the condition code register to branch on, the true/false values to
7179 // select between, and the condcode to use to select the appropriate branch.
7180 //
7181 // We produce the following control flow:
7182 // HeadMBB
7183 // | \
7184 // | IfFalseMBB
7185 // | /
7186 // TailMBB
7187 //
7188 // When we find a sequence of selects we attempt to optimize their emission
7189 // by sharing the control flow. Currently we only handle cases where we have
7190 // multiple selects with the exact same condition (same LHS, RHS and CC).
7191 // The selects may be interleaved with other instructions if the other
7192 // instructions meet some requirements we deem safe:
7193 // - They are not pseudo instructions.
7194 // - They are debug instructions. Otherwise,
7195 // - They do not have side-effects, do not access memory and their inputs do
7196 // not depend on the results of the select pseudo-instructions.
7197 // The TrueV/FalseV operands of the selects cannot depend on the result of
7198 // previous selects in the sequence.
7199 // These conditions could be further relaxed. See the X86 target for a
7200 // related approach and more information.
7201
7202 Register LHS = MI.getOperand(1).getReg();
7203 Register RHS;
7204 if (MI.getOperand(2).isReg())
7205 RHS = MI.getOperand(2).getReg();
7206 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7207
7208 SmallVector<MachineInstr *, 4> SelectDebugValues;
7209 SmallSet<Register, 4> SelectDests;
7210 SelectDests.insert(MI.getOperand(0).getReg());
7211
7212 MachineInstr *LastSelectPseudo = &MI;
7213 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7214 SequenceMBBI != E; ++SequenceMBBI) {
7215 if (SequenceMBBI->isDebugInstr())
7216 continue;
7217 if (isSelectPseudo(*SequenceMBBI)) {
7218 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7219 !SequenceMBBI->getOperand(2).isReg() ||
7220 SequenceMBBI->getOperand(2).getReg() != RHS ||
7221 SequenceMBBI->getOperand(3).getImm() != CC ||
7222 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7223 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7224 break;
7225 LastSelectPseudo = &*SequenceMBBI;
7226 SequenceMBBI->collectDebugValues(SelectDebugValues);
7227 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7228 continue;
7229 }
7230 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7231 SequenceMBBI->mayLoadOrStore() ||
7232 SequenceMBBI->usesCustomInsertionHook())
7233 break;
7234 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7235 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7236 }))
7237 break;
7238 }
7239
7240 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7241 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7242 DebugLoc DL = MI.getDebugLoc();
7244
7245 MachineBasicBlock *HeadMBB = BB;
7246 MachineFunction *F = BB->getParent();
7247 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7248 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7249
7250 F->insert(I, IfFalseMBB);
7251 F->insert(I, TailMBB);
7252
7253 // Set the call frame size on entry to the new basic blocks.
7254 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7255 IfFalseMBB->setCallFrameSize(CallFrameSize);
7256 TailMBB->setCallFrameSize(CallFrameSize);
7257
7258 // Transfer debug instructions associated with the selects to TailMBB.
7259 for (MachineInstr *DebugInstr : SelectDebugValues) {
7260 TailMBB->push_back(DebugInstr->removeFromParent());
7261 }
7262
7263 // Move all instructions after the sequence to TailMBB.
7264 TailMBB->splice(TailMBB->end(), HeadMBB,
7265 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7266 // Update machine-CFG edges by transferring all successors of the current
7267 // block to the new block which will contain the Phi nodes for the selects.
7268 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7269 // Set the successors for HeadMBB.
7270 HeadMBB->addSuccessor(IfFalseMBB);
7271 HeadMBB->addSuccessor(TailMBB);
7272
7273 // Insert appropriate branch.
7274 if (MI.getOperand(2).isImm())
7275 BuildMI(HeadMBB, DL, TII.get(CC))
7276 .addReg(LHS)
7277 .addImm(MI.getOperand(2).getImm())
7278 .addMBB(TailMBB);
7279 else
7280 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7281
7282 // IfFalseMBB just falls through to TailMBB.
7283 IfFalseMBB->addSuccessor(TailMBB);
7284
7285 // Create PHIs for all of the select pseudo-instructions.
7286 auto SelectMBBI = MI.getIterator();
7287 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7288 auto InsertionPoint = TailMBB->begin();
7289 while (SelectMBBI != SelectEnd) {
7290 auto Next = std::next(SelectMBBI);
7291 if (isSelectPseudo(*SelectMBBI)) {
7292 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7293 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7294 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7295 .addReg(SelectMBBI->getOperand(4).getReg())
7296 .addMBB(HeadMBB)
7297 .addReg(SelectMBBI->getOperand(5).getReg())
7298 .addMBB(IfFalseMBB);
7299 SelectMBBI->eraseFromParent();
7300 }
7301 SelectMBBI = Next;
7302 }
7303
7304 F->getProperties().resetNoPHIs();
7305 return TailMBB;
7306}
7307
7308MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7309 MachineInstr &MI, MachineBasicBlock *BB) const {
7310 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7311 DebugLoc DL = MI.getDebugLoc();
7312
7313 switch (MI.getOpcode()) {
7314 default:
7315 llvm_unreachable("Unexpected instr type to insert");
7316 case LoongArch::DIV_W:
7317 case LoongArch::DIV_WU:
7318 case LoongArch::MOD_W:
7319 case LoongArch::MOD_WU:
7320 case LoongArch::DIV_D:
7321 case LoongArch::DIV_DU:
7322 case LoongArch::MOD_D:
7323 case LoongArch::MOD_DU:
7324 return insertDivByZeroTrap(MI, BB);
7325 break;
7326 case LoongArch::WRFCSR: {
7327 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7328 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7329 .addReg(MI.getOperand(1).getReg());
7330 MI.eraseFromParent();
7331 return BB;
7332 }
7333 case LoongArch::RDFCSR: {
7334 MachineInstr *ReadFCSR =
7335 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7336 MI.getOperand(0).getReg())
7337 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7338 ReadFCSR->getOperand(1).setIsUndef();
7339 MI.eraseFromParent();
7340 return BB;
7341 }
7342 case LoongArch::Select_GPR_Using_CC_GPR:
7343 return emitSelectPseudo(MI, BB, Subtarget);
7344 case LoongArch::BuildPairF64Pseudo:
7345 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7346 case LoongArch::SplitPairF64Pseudo:
7347 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7348 case LoongArch::PseudoVBZ:
7349 case LoongArch::PseudoVBZ_B:
7350 case LoongArch::PseudoVBZ_H:
7351 case LoongArch::PseudoVBZ_W:
7352 case LoongArch::PseudoVBZ_D:
7353 case LoongArch::PseudoVBNZ:
7354 case LoongArch::PseudoVBNZ_B:
7355 case LoongArch::PseudoVBNZ_H:
7356 case LoongArch::PseudoVBNZ_W:
7357 case LoongArch::PseudoVBNZ_D:
7358 case LoongArch::PseudoXVBZ:
7359 case LoongArch::PseudoXVBZ_B:
7360 case LoongArch::PseudoXVBZ_H:
7361 case LoongArch::PseudoXVBZ_W:
7362 case LoongArch::PseudoXVBZ_D:
7363 case LoongArch::PseudoXVBNZ:
7364 case LoongArch::PseudoXVBNZ_B:
7365 case LoongArch::PseudoXVBNZ_H:
7366 case LoongArch::PseudoXVBNZ_W:
7367 case LoongArch::PseudoXVBNZ_D:
7368 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7369 case LoongArch::PseudoXVINSGR2VR_B:
7370 case LoongArch::PseudoXVINSGR2VR_H:
7371 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7372 case LoongArch::PseudoCTPOP:
7373 return emitPseudoCTPOP(MI, BB, Subtarget);
7374 case LoongArch::PseudoVMSKLTZ_B:
7375 case LoongArch::PseudoVMSKLTZ_H:
7376 case LoongArch::PseudoVMSKLTZ_W:
7377 case LoongArch::PseudoVMSKLTZ_D:
7378 case LoongArch::PseudoVMSKGEZ_B:
7379 case LoongArch::PseudoVMSKEQZ_B:
7380 case LoongArch::PseudoVMSKNEZ_B:
7381 case LoongArch::PseudoXVMSKLTZ_B:
7382 case LoongArch::PseudoXVMSKLTZ_H:
7383 case LoongArch::PseudoXVMSKLTZ_W:
7384 case LoongArch::PseudoXVMSKLTZ_D:
7385 case LoongArch::PseudoXVMSKGEZ_B:
7386 case LoongArch::PseudoXVMSKEQZ_B:
7387 case LoongArch::PseudoXVMSKNEZ_B:
7388 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7389 case TargetOpcode::STATEPOINT:
7390 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7391 // while bl call instruction (where statepoint will be lowered at the
7392 // end) has implicit def. This def is early-clobber as it will be set at
7393 // the moment of the call and earlier than any use is read.
7394 // Add this implicit dead def here as a workaround.
7395 MI.addOperand(*MI.getMF(),
7397 LoongArch::R1, /*isDef*/ true,
7398 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7399 /*isUndef*/ false, /*isEarlyClobber*/ true));
7400 if (!Subtarget.is64Bit())
7401 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7402 return emitPatchPoint(MI, BB);
7403 }
7404}
7405
7407 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7408 unsigned *Fast) const {
7409 if (!Subtarget.hasUAL())
7410 return false;
7411
7412 // TODO: set reasonable speed number.
7413 if (Fast)
7414 *Fast = 1;
7415 return true;
7416}
7417
7418const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7419 switch ((LoongArchISD::NodeType)Opcode) {
7421 break;
7422
7423#define NODE_NAME_CASE(node) \
7424 case LoongArchISD::node: \
7425 return "LoongArchISD::" #node;
7426
7427 // TODO: Add more target-dependent nodes later.
7428 NODE_NAME_CASE(CALL)
7429 NODE_NAME_CASE(CALL_MEDIUM)
7430 NODE_NAME_CASE(CALL_LARGE)
7431 NODE_NAME_CASE(RET)
7432 NODE_NAME_CASE(TAIL)
7433 NODE_NAME_CASE(TAIL_MEDIUM)
7434 NODE_NAME_CASE(TAIL_LARGE)
7435 NODE_NAME_CASE(SELECT_CC)
7436 NODE_NAME_CASE(BR_CC)
7437 NODE_NAME_CASE(BRCOND)
7438 NODE_NAME_CASE(SLL_W)
7439 NODE_NAME_CASE(SRA_W)
7440 NODE_NAME_CASE(SRL_W)
7441 NODE_NAME_CASE(BSTRINS)
7442 NODE_NAME_CASE(BSTRPICK)
7443 NODE_NAME_CASE(MOVGR2FR_W)
7444 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7445 NODE_NAME_CASE(MOVGR2FR_D)
7446 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7447 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7448 NODE_NAME_CASE(FTINT)
7449 NODE_NAME_CASE(BUILD_PAIR_F64)
7450 NODE_NAME_CASE(SPLIT_PAIR_F64)
7451 NODE_NAME_CASE(REVB_2H)
7452 NODE_NAME_CASE(REVB_2W)
7453 NODE_NAME_CASE(BITREV_4B)
7454 NODE_NAME_CASE(BITREV_8B)
7455 NODE_NAME_CASE(BITREV_W)
7456 NODE_NAME_CASE(ROTR_W)
7457 NODE_NAME_CASE(ROTL_W)
7458 NODE_NAME_CASE(DIV_W)
7459 NODE_NAME_CASE(DIV_WU)
7460 NODE_NAME_CASE(MOD_W)
7461 NODE_NAME_CASE(MOD_WU)
7462 NODE_NAME_CASE(CLZ_W)
7463 NODE_NAME_CASE(CTZ_W)
7464 NODE_NAME_CASE(DBAR)
7465 NODE_NAME_CASE(IBAR)
7466 NODE_NAME_CASE(BREAK)
7467 NODE_NAME_CASE(SYSCALL)
7468 NODE_NAME_CASE(CRC_W_B_W)
7469 NODE_NAME_CASE(CRC_W_H_W)
7470 NODE_NAME_CASE(CRC_W_W_W)
7471 NODE_NAME_CASE(CRC_W_D_W)
7472 NODE_NAME_CASE(CRCC_W_B_W)
7473 NODE_NAME_CASE(CRCC_W_H_W)
7474 NODE_NAME_CASE(CRCC_W_W_W)
7475 NODE_NAME_CASE(CRCC_W_D_W)
7476 NODE_NAME_CASE(CSRRD)
7477 NODE_NAME_CASE(CSRWR)
7478 NODE_NAME_CASE(CSRXCHG)
7479 NODE_NAME_CASE(IOCSRRD_B)
7480 NODE_NAME_CASE(IOCSRRD_H)
7481 NODE_NAME_CASE(IOCSRRD_W)
7482 NODE_NAME_CASE(IOCSRRD_D)
7483 NODE_NAME_CASE(IOCSRWR_B)
7484 NODE_NAME_CASE(IOCSRWR_H)
7485 NODE_NAME_CASE(IOCSRWR_W)
7486 NODE_NAME_CASE(IOCSRWR_D)
7487 NODE_NAME_CASE(CPUCFG)
7488 NODE_NAME_CASE(MOVGR2FCSR)
7489 NODE_NAME_CASE(MOVFCSR2GR)
7490 NODE_NAME_CASE(CACOP_D)
7491 NODE_NAME_CASE(CACOP_W)
7492 NODE_NAME_CASE(VSHUF)
7493 NODE_NAME_CASE(VPICKEV)
7494 NODE_NAME_CASE(VPICKOD)
7495 NODE_NAME_CASE(VPACKEV)
7496 NODE_NAME_CASE(VPACKOD)
7497 NODE_NAME_CASE(VILVL)
7498 NODE_NAME_CASE(VILVH)
7499 NODE_NAME_CASE(VSHUF4I)
7500 NODE_NAME_CASE(VREPLVEI)
7501 NODE_NAME_CASE(VREPLGR2VR)
7502 NODE_NAME_CASE(XVPERMI)
7503 NODE_NAME_CASE(XVPERM)
7504 NODE_NAME_CASE(XVREPLVE0)
7505 NODE_NAME_CASE(XVREPLVE0Q)
7506 NODE_NAME_CASE(XVINSVE0)
7507 NODE_NAME_CASE(VPICK_SEXT_ELT)
7508 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7509 NODE_NAME_CASE(VREPLVE)
7510 NODE_NAME_CASE(VALL_ZERO)
7511 NODE_NAME_CASE(VANY_ZERO)
7512 NODE_NAME_CASE(VALL_NONZERO)
7513 NODE_NAME_CASE(VANY_NONZERO)
7514 NODE_NAME_CASE(FRECIPE)
7515 NODE_NAME_CASE(FRSQRTE)
7516 NODE_NAME_CASE(VSLLI)
7517 NODE_NAME_CASE(VSRLI)
7518 NODE_NAME_CASE(VBSLL)
7519 NODE_NAME_CASE(VBSRL)
7520 NODE_NAME_CASE(VLDREPL)
7521 NODE_NAME_CASE(VMSKLTZ)
7522 NODE_NAME_CASE(VMSKGEZ)
7523 NODE_NAME_CASE(VMSKEQZ)
7524 NODE_NAME_CASE(VMSKNEZ)
7525 NODE_NAME_CASE(XVMSKLTZ)
7526 NODE_NAME_CASE(XVMSKGEZ)
7527 NODE_NAME_CASE(XVMSKEQZ)
7528 NODE_NAME_CASE(XVMSKNEZ)
7529 NODE_NAME_CASE(VHADDW)
7530 }
7531#undef NODE_NAME_CASE
7532 return nullptr;
7533}
7534
7535//===----------------------------------------------------------------------===//
7536// Calling Convention Implementation
7537//===----------------------------------------------------------------------===//
7538
7539// Eight general-purpose registers a0-a7 used for passing integer arguments,
7540// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7541// fixed-point arguments, and floating-point arguments when no FPR is available
7542// or with soft float ABI.
7543const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7544 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7545 LoongArch::R10, LoongArch::R11};
7546// Eight floating-point registers fa0-fa7 used for passing floating-point
7547// arguments, and fa0-fa1 are also used to return values.
7548const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7549 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7550 LoongArch::F6, LoongArch::F7};
7551// FPR32 and FPR64 alias each other.
7553 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7554 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7555
7556const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7557 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7558 LoongArch::VR6, LoongArch::VR7};
7559
7560const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7561 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7562 LoongArch::XR6, LoongArch::XR7};
7563
7564// Pass a 2*GRLen argument that has been split into two GRLen values through
7565// registers or the stack as necessary.
7566static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7567 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7568 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7569 ISD::ArgFlagsTy ArgFlags2) {
7570 unsigned GRLenInBytes = GRLen / 8;
7571 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7572 // At least one half can be passed via register.
7573 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7574 VA1.getLocVT(), CCValAssign::Full));
7575 } else {
7576 // Both halves must be passed on the stack, with proper alignment.
7577 Align StackAlign =
7578 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7579 State.addLoc(
7581 State.AllocateStack(GRLenInBytes, StackAlign),
7582 VA1.getLocVT(), CCValAssign::Full));
7583 State.addLoc(CCValAssign::getMem(
7584 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7585 LocVT2, CCValAssign::Full));
7586 return false;
7587 }
7588 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7589 // The second half can also be passed via register.
7590 State.addLoc(
7591 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7592 } else {
7593 // The second half is passed via the stack, without additional alignment.
7594 State.addLoc(CCValAssign::getMem(
7595 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7596 LocVT2, CCValAssign::Full));
7597 }
7598 return false;
7599}
7600
7601// Implements the LoongArch calling convention. Returns true upon failure.
7603 unsigned ValNo, MVT ValVT,
7604 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7605 CCState &State, bool IsRet, Type *OrigTy) {
7606 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7607 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7608 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7609 MVT LocVT = ValVT;
7610
7611 // Any return value split into more than two values can't be returned
7612 // directly.
7613 if (IsRet && ValNo > 1)
7614 return true;
7615
7616 // If passing a variadic argument, or if no FPR is available.
7617 bool UseGPRForFloat = true;
7618
7619 switch (ABI) {
7620 default:
7621 llvm_unreachable("Unexpected ABI");
7622 break;
7627 UseGPRForFloat = ArgFlags.isVarArg();
7628 break;
7631 break;
7632 }
7633
7634 // If this is a variadic argument, the LoongArch calling convention requires
7635 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7636 // byte alignment. An aligned register should be used regardless of whether
7637 // the original argument was split during legalisation or not. The argument
7638 // will not be passed by registers if the original type is larger than
7639 // 2*GRLen, so the register alignment rule does not apply.
7640 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7641 if (ArgFlags.isVarArg() &&
7642 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7643 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7644 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7645 // Skip 'odd' register if necessary.
7646 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7647 State.AllocateReg(ArgGPRs);
7648 }
7649
7650 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7651 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7652 State.getPendingArgFlags();
7653
7654 assert(PendingLocs.size() == PendingArgFlags.size() &&
7655 "PendingLocs and PendingArgFlags out of sync");
7656
7657 // FPR32 and FPR64 alias each other.
7658 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7659 UseGPRForFloat = true;
7660
7661 if (UseGPRForFloat && ValVT == MVT::f32) {
7662 LocVT = GRLenVT;
7663 LocInfo = CCValAssign::BCvt;
7664 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7665 LocVT = MVT::i64;
7666 LocInfo = CCValAssign::BCvt;
7667 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7668 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7669 // registers are exhausted.
7670 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7671 // Depending on available argument GPRS, f64 may be passed in a pair of
7672 // GPRs, split between a GPR and the stack, or passed completely on the
7673 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7674 // cases.
7675 MCRegister Reg = State.AllocateReg(ArgGPRs);
7676 if (!Reg) {
7677 int64_t StackOffset = State.AllocateStack(8, Align(8));
7678 State.addLoc(
7679 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7680 return false;
7681 }
7682 LocVT = MVT::i32;
7683 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7684 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7685 if (HiReg) {
7686 State.addLoc(
7687 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7688 } else {
7689 int64_t StackOffset = State.AllocateStack(4, Align(4));
7690 State.addLoc(
7691 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7692 }
7693 return false;
7694 }
7695
7696 // Split arguments might be passed indirectly, so keep track of the pending
7697 // values.
7698 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7699 LocVT = GRLenVT;
7700 LocInfo = CCValAssign::Indirect;
7701 PendingLocs.push_back(
7702 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7703 PendingArgFlags.push_back(ArgFlags);
7704 if (!ArgFlags.isSplitEnd()) {
7705 return false;
7706 }
7707 }
7708
7709 // If the split argument only had two elements, it should be passed directly
7710 // in registers or on the stack.
7711 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7712 PendingLocs.size() <= 2) {
7713 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7714 // Apply the normal calling convention rules to the first half of the
7715 // split argument.
7716 CCValAssign VA = PendingLocs[0];
7717 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7718 PendingLocs.clear();
7719 PendingArgFlags.clear();
7720 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7721 ArgFlags);
7722 }
7723
7724 // Allocate to a register if possible, or else a stack slot.
7725 Register Reg;
7726 unsigned StoreSizeBytes = GRLen / 8;
7727 Align StackAlign = Align(GRLen / 8);
7728
7729 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7730 Reg = State.AllocateReg(ArgFPR32s);
7731 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7732 Reg = State.AllocateReg(ArgFPR64s);
7733 } else if (ValVT.is128BitVector()) {
7734 Reg = State.AllocateReg(ArgVRs);
7735 UseGPRForFloat = false;
7736 StoreSizeBytes = 16;
7737 StackAlign = Align(16);
7738 } else if (ValVT.is256BitVector()) {
7739 Reg = State.AllocateReg(ArgXRs);
7740 UseGPRForFloat = false;
7741 StoreSizeBytes = 32;
7742 StackAlign = Align(32);
7743 } else {
7744 Reg = State.AllocateReg(ArgGPRs);
7745 }
7746
7747 unsigned StackOffset =
7748 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7749
7750 // If we reach this point and PendingLocs is non-empty, we must be at the
7751 // end of a split argument that must be passed indirectly.
7752 if (!PendingLocs.empty()) {
7753 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7754 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7755 for (auto &It : PendingLocs) {
7756 if (Reg)
7757 It.convertToReg(Reg);
7758 else
7759 It.convertToMem(StackOffset);
7760 State.addLoc(It);
7761 }
7762 PendingLocs.clear();
7763 PendingArgFlags.clear();
7764 return false;
7765 }
7766 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7767 "Expected an GRLenVT at this stage");
7768
7769 if (Reg) {
7770 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7771 return false;
7772 }
7773
7774 // When a floating-point value is passed on the stack, no bit-cast is needed.
7775 if (ValVT.isFloatingPoint()) {
7776 LocVT = ValVT;
7777 LocInfo = CCValAssign::Full;
7778 }
7779
7780 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7781 return false;
7782}
7783
7784void LoongArchTargetLowering::analyzeInputArgs(
7785 MachineFunction &MF, CCState &CCInfo,
7786 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7787 LoongArchCCAssignFn Fn) const {
7788 FunctionType *FType = MF.getFunction().getFunctionType();
7789 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7790 MVT ArgVT = Ins[i].VT;
7791 Type *ArgTy = nullptr;
7792 if (IsRet)
7793 ArgTy = FType->getReturnType();
7794 else if (Ins[i].isOrigArg())
7795 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7797 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7798 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7799 CCInfo, IsRet, ArgTy)) {
7800 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7801 << '\n');
7802 llvm_unreachable("");
7803 }
7804 }
7805}
7806
7807void LoongArchTargetLowering::analyzeOutputArgs(
7808 MachineFunction &MF, CCState &CCInfo,
7809 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7810 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7811 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7812 MVT ArgVT = Outs[i].VT;
7813 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7815 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7816 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7817 CCInfo, IsRet, OrigTy)) {
7818 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7819 << "\n");
7820 llvm_unreachable("");
7821 }
7822 }
7823}
7824
7825// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7826// values.
7828 const CCValAssign &VA, const SDLoc &DL) {
7829 switch (VA.getLocInfo()) {
7830 default:
7831 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7832 case CCValAssign::Full:
7834 break;
7835 case CCValAssign::BCvt:
7836 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7837 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7838 else
7839 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7840 break;
7841 }
7842 return Val;
7843}
7844
7846 const CCValAssign &VA, const SDLoc &DL,
7847 const ISD::InputArg &In,
7848 const LoongArchTargetLowering &TLI) {
7851 EVT LocVT = VA.getLocVT();
7852 SDValue Val;
7853 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7854 Register VReg = RegInfo.createVirtualRegister(RC);
7855 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7856 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7857
7858 // If input is sign extended from 32 bits, note it for the OptW pass.
7859 if (In.isOrigArg()) {
7860 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7861 if (OrigArg->getType()->isIntegerTy()) {
7862 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7863 // An input zero extended from i31 can also be considered sign extended.
7864 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7865 (BitWidth < 32 && In.Flags.isZExt())) {
7868 LAFI->addSExt32Register(VReg);
7869 }
7870 }
7871 }
7872
7873 return convertLocVTToValVT(DAG, Val, VA, DL);
7874}
7875
7876// The caller is responsible for loading the full value if the argument is
7877// passed with CCValAssign::Indirect.
7879 const CCValAssign &VA, const SDLoc &DL) {
7881 MachineFrameInfo &MFI = MF.getFrameInfo();
7882 EVT ValVT = VA.getValVT();
7883 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7884 /*IsImmutable=*/true);
7885 SDValue FIN = DAG.getFrameIndex(
7887
7888 ISD::LoadExtType ExtType;
7889 switch (VA.getLocInfo()) {
7890 default:
7891 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7892 case CCValAssign::Full:
7894 case CCValAssign::BCvt:
7895 ExtType = ISD::NON_EXTLOAD;
7896 break;
7897 }
7898 return DAG.getExtLoad(
7899 ExtType, DL, VA.getLocVT(), Chain, FIN,
7901}
7902
7904 const CCValAssign &VA,
7905 const CCValAssign &HiVA,
7906 const SDLoc &DL) {
7907 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7908 "Unexpected VA");
7910 MachineFrameInfo &MFI = MF.getFrameInfo();
7912
7913 assert(VA.isRegLoc() && "Expected register VA assignment");
7914
7915 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7916 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7917 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7918 SDValue Hi;
7919 if (HiVA.isMemLoc()) {
7920 // Second half of f64 is passed on the stack.
7921 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7922 /*IsImmutable=*/true);
7923 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7924 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7926 } else {
7927 // Second half of f64 is passed in another GPR.
7928 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7929 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7930 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7931 }
7932 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7933}
7934
7936 const CCValAssign &VA, const SDLoc &DL) {
7937 EVT LocVT = VA.getLocVT();
7938
7939 switch (VA.getLocInfo()) {
7940 default:
7941 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7942 case CCValAssign::Full:
7943 break;
7944 case CCValAssign::BCvt:
7945 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7946 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7947 else
7948 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7949 break;
7950 }
7951 return Val;
7952}
7953
7954static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7955 CCValAssign::LocInfo LocInfo,
7956 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7957 CCState &State) {
7958 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7959 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7960 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7961 static const MCPhysReg GPRList[] = {
7962 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7963 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7964 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7965 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7966 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7967 return false;
7968 }
7969 }
7970
7971 if (LocVT == MVT::f32) {
7972 // Pass in STG registers: F1, F2, F3, F4
7973 // fs0,fs1,fs2,fs3
7974 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7975 LoongArch::F26, LoongArch::F27};
7976 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7977 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7978 return false;
7979 }
7980 }
7981
7982 if (LocVT == MVT::f64) {
7983 // Pass in STG registers: D1, D2, D3, D4
7984 // fs4,fs5,fs6,fs7
7985 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7986 LoongArch::F30_64, LoongArch::F31_64};
7987 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7988 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7989 return false;
7990 }
7991 }
7992
7993 report_fatal_error("No registers left in GHC calling convention");
7994 return true;
7995}
7996
7997// Transform physical registers into virtual registers.
7999 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8000 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8001 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8002
8004
8005 switch (CallConv) {
8006 default:
8007 llvm_unreachable("Unsupported calling convention");
8008 case CallingConv::C:
8009 case CallingConv::Fast:
8011 break;
8012 case CallingConv::GHC:
8013 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8014 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8016 "GHC calling convention requires the F and D extensions");
8017 }
8018
8019 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8020 MVT GRLenVT = Subtarget.getGRLenVT();
8021 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8022 // Used with varargs to acumulate store chains.
8023 std::vector<SDValue> OutChains;
8024
8025 // Assign locations to all of the incoming arguments.
8027 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8028
8029 if (CallConv == CallingConv::GHC)
8031 else
8032 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8033
8034 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8035 CCValAssign &VA = ArgLocs[i];
8036 SDValue ArgValue;
8037 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8038 // case.
8039 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8040 assert(VA.needsCustom());
8041 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8042 } else if (VA.isRegLoc())
8043 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8044 else
8045 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8046 if (VA.getLocInfo() == CCValAssign::Indirect) {
8047 // If the original argument was split and passed by reference, we need to
8048 // load all parts of it here (using the same address).
8049 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8051 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8052 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8053 assert(ArgPartOffset == 0);
8054 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8055 CCValAssign &PartVA = ArgLocs[i + 1];
8056 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8057 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8058 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8059 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8061 ++i;
8062 ++InsIdx;
8063 }
8064 continue;
8065 }
8066 InVals.push_back(ArgValue);
8067 }
8068
8069 if (IsVarArg) {
8071 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8072 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8073 MachineFrameInfo &MFI = MF.getFrameInfo();
8074 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8075 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8076
8077 // Offset of the first variable argument from stack pointer, and size of
8078 // the vararg save area. For now, the varargs save area is either zero or
8079 // large enough to hold a0-a7.
8080 int VaArgOffset, VarArgsSaveSize;
8081
8082 // If all registers are allocated, then all varargs must be passed on the
8083 // stack and we don't need to save any argregs.
8084 if (ArgRegs.size() == Idx) {
8085 VaArgOffset = CCInfo.getStackSize();
8086 VarArgsSaveSize = 0;
8087 } else {
8088 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8089 VaArgOffset = -VarArgsSaveSize;
8090 }
8091
8092 // Record the frame index of the first variable argument
8093 // which is a value necessary to VASTART.
8094 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8095 LoongArchFI->setVarArgsFrameIndex(FI);
8096
8097 // If saving an odd number of registers then create an extra stack slot to
8098 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8099 // offsets to even-numbered registered remain 2*GRLen-aligned.
8100 if (Idx % 2) {
8101 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8102 true);
8103 VarArgsSaveSize += GRLenInBytes;
8104 }
8105
8106 // Copy the integer registers that may have been used for passing varargs
8107 // to the vararg save area.
8108 for (unsigned I = Idx; I < ArgRegs.size();
8109 ++I, VaArgOffset += GRLenInBytes) {
8110 const Register Reg = RegInfo.createVirtualRegister(RC);
8111 RegInfo.addLiveIn(ArgRegs[I], Reg);
8112 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8113 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8114 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8115 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8117 cast<StoreSDNode>(Store.getNode())
8118 ->getMemOperand()
8119 ->setValue((Value *)nullptr);
8120 OutChains.push_back(Store);
8121 }
8122 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8123 }
8124
8125 // All stores are grouped in one node to allow the matching between
8126 // the size of Ins and InVals. This only happens for vararg functions.
8127 if (!OutChains.empty()) {
8128 OutChains.push_back(Chain);
8129 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8130 }
8131
8132 return Chain;
8133}
8134
8136 return CI->isTailCall();
8137}
8138
8139// Check if the return value is used as only a return value, as otherwise
8140// we can't perform a tail-call.
8142 SDValue &Chain) const {
8143 if (N->getNumValues() != 1)
8144 return false;
8145 if (!N->hasNUsesOfValue(1, 0))
8146 return false;
8147
8148 SDNode *Copy = *N->user_begin();
8149 if (Copy->getOpcode() != ISD::CopyToReg)
8150 return false;
8151
8152 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8153 // isn't safe to perform a tail call.
8154 if (Copy->getGluedNode())
8155 return false;
8156
8157 // The copy must be used by a LoongArchISD::RET, and nothing else.
8158 bool HasRet = false;
8159 for (SDNode *Node : Copy->users()) {
8160 if (Node->getOpcode() != LoongArchISD::RET)
8161 return false;
8162 HasRet = true;
8163 }
8164
8165 if (!HasRet)
8166 return false;
8167
8168 Chain = Copy->getOperand(0);
8169 return true;
8170}
8171
8172// Check whether the call is eligible for tail call optimization.
8173bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8174 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8175 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8176
8177 auto CalleeCC = CLI.CallConv;
8178 auto &Outs = CLI.Outs;
8179 auto &Caller = MF.getFunction();
8180 auto CallerCC = Caller.getCallingConv();
8181
8182 // Do not tail call opt if the stack is used to pass parameters.
8183 if (CCInfo.getStackSize() != 0)
8184 return false;
8185
8186 // Do not tail call opt if any parameters need to be passed indirectly.
8187 for (auto &VA : ArgLocs)
8188 if (VA.getLocInfo() == CCValAssign::Indirect)
8189 return false;
8190
8191 // Do not tail call opt if either caller or callee uses struct return
8192 // semantics.
8193 auto IsCallerStructRet = Caller.hasStructRetAttr();
8194 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8195 if (IsCallerStructRet || IsCalleeStructRet)
8196 return false;
8197
8198 // Do not tail call opt if either the callee or caller has a byval argument.
8199 for (auto &Arg : Outs)
8200 if (Arg.Flags.isByVal())
8201 return false;
8202
8203 // The callee has to preserve all registers the caller needs to preserve.
8204 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8205 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8206 if (CalleeCC != CallerCC) {
8207 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8208 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8209 return false;
8210 }
8211 return true;
8212}
8213
8215 return DAG.getDataLayout().getPrefTypeAlign(
8216 VT.getTypeForEVT(*DAG.getContext()));
8217}
8218
8219// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8220// and output parameter nodes.
8221SDValue
8223 SmallVectorImpl<SDValue> &InVals) const {
8224 SelectionDAG &DAG = CLI.DAG;
8225 SDLoc &DL = CLI.DL;
8227 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8229 SDValue Chain = CLI.Chain;
8230 SDValue Callee = CLI.Callee;
8231 CallingConv::ID CallConv = CLI.CallConv;
8232 bool IsVarArg = CLI.IsVarArg;
8233 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8234 MVT GRLenVT = Subtarget.getGRLenVT();
8235 bool &IsTailCall = CLI.IsTailCall;
8236
8238
8239 // Analyze the operands of the call, assigning locations to each operand.
8241 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8242
8243 if (CallConv == CallingConv::GHC)
8244 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8245 else
8246 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8247
8248 // Check if it's really possible to do a tail call.
8249 if (IsTailCall)
8250 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8251
8252 if (IsTailCall)
8253 ++NumTailCalls;
8254 else if (CLI.CB && CLI.CB->isMustTailCall())
8255 report_fatal_error("failed to perform tail call elimination on a call "
8256 "site marked musttail");
8257
8258 // Get a count of how many bytes are to be pushed on the stack.
8259 unsigned NumBytes = ArgCCInfo.getStackSize();
8260
8261 // Create local copies for byval args.
8262 SmallVector<SDValue> ByValArgs;
8263 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8264 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8265 if (!Flags.isByVal())
8266 continue;
8267
8268 SDValue Arg = OutVals[i];
8269 unsigned Size = Flags.getByValSize();
8270 Align Alignment = Flags.getNonZeroByValAlign();
8271
8272 int FI =
8273 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8274 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8275 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8276
8277 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8278 /*IsVolatile=*/false,
8279 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8281 ByValArgs.push_back(FIPtr);
8282 }
8283
8284 if (!IsTailCall)
8285 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8286
8287 // Copy argument values to their designated locations.
8289 SmallVector<SDValue> MemOpChains;
8290 SDValue StackPtr;
8291 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8292 ++i, ++OutIdx) {
8293 CCValAssign &VA = ArgLocs[i];
8294 SDValue ArgValue = OutVals[OutIdx];
8295 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8296
8297 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8298 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8299 assert(VA.isRegLoc() && "Expected register VA assignment");
8300 assert(VA.needsCustom());
8301 SDValue SplitF64 =
8303 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8304 SDValue Lo = SplitF64.getValue(0);
8305 SDValue Hi = SplitF64.getValue(1);
8306
8307 Register RegLo = VA.getLocReg();
8308 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8309
8310 // Get the CCValAssign for the Hi part.
8311 CCValAssign &HiVA = ArgLocs[++i];
8312
8313 if (HiVA.isMemLoc()) {
8314 // Second half of f64 is passed on the stack.
8315 if (!StackPtr.getNode())
8316 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8318 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8319 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8320 // Emit the store.
8321 MemOpChains.push_back(DAG.getStore(
8322 Chain, DL, Hi, Address,
8324 } else {
8325 // Second half of f64 is passed in another GPR.
8326 Register RegHigh = HiVA.getLocReg();
8327 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8328 }
8329 continue;
8330 }
8331
8332 // Promote the value if needed.
8333 // For now, only handle fully promoted and indirect arguments.
8334 if (VA.getLocInfo() == CCValAssign::Indirect) {
8335 // Store the argument in a stack slot and pass its address.
8336 Align StackAlign =
8337 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8338 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8339 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8340 // If the original argument was split and passed by reference, we need to
8341 // store the required parts of it here (and pass just one address).
8342 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8343 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8344 assert(ArgPartOffset == 0);
8345 // Calculate the total size to store. We don't have access to what we're
8346 // actually storing other than performing the loop and collecting the
8347 // info.
8349 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8350 SDValue PartValue = OutVals[OutIdx + 1];
8351 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8352 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8353 EVT PartVT = PartValue.getValueType();
8354
8355 StoredSize += PartVT.getStoreSize();
8356 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8357 Parts.push_back(std::make_pair(PartValue, Offset));
8358 ++i;
8359 ++OutIdx;
8360 }
8361 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8362 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8363 MemOpChains.push_back(
8364 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8366 for (const auto &Part : Parts) {
8367 SDValue PartValue = Part.first;
8368 SDValue PartOffset = Part.second;
8370 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8371 MemOpChains.push_back(
8372 DAG.getStore(Chain, DL, PartValue, Address,
8374 }
8375 ArgValue = SpillSlot;
8376 } else {
8377 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8378 }
8379
8380 // Use local copy if it is a byval arg.
8381 if (Flags.isByVal())
8382 ArgValue = ByValArgs[j++];
8383
8384 if (VA.isRegLoc()) {
8385 // Queue up the argument copies and emit them at the end.
8386 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8387 } else {
8388 assert(VA.isMemLoc() && "Argument not register or memory");
8389 assert(!IsTailCall && "Tail call not allowed if stack is used "
8390 "for passing parameters");
8391
8392 // Work out the address of the stack slot.
8393 if (!StackPtr.getNode())
8394 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8396 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8398
8399 // Emit the store.
8400 MemOpChains.push_back(
8401 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8402 }
8403 }
8404
8405 // Join the stores, which are independent of one another.
8406 if (!MemOpChains.empty())
8407 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8408
8409 SDValue Glue;
8410
8411 // Build a sequence of copy-to-reg nodes, chained and glued together.
8412 for (auto &Reg : RegsToPass) {
8413 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8414 Glue = Chain.getValue(1);
8415 }
8416
8417 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8418 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8419 // split it and then direct call can be matched by PseudoCALL.
8421 const GlobalValue *GV = S->getGlobal();
8422 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8425 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8426 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8427 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8430 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8431 }
8432
8433 // The first call operand is the chain and the second is the target address.
8435 Ops.push_back(Chain);
8436 Ops.push_back(Callee);
8437
8438 // Add argument registers to the end of the list so that they are
8439 // known live into the call.
8440 for (auto &Reg : RegsToPass)
8441 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8442
8443 if (!IsTailCall) {
8444 // Add a register mask operand representing the call-preserved registers.
8445 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8446 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8447 assert(Mask && "Missing call preserved mask for calling convention");
8448 Ops.push_back(DAG.getRegisterMask(Mask));
8449 }
8450
8451 // Glue the call to the argument copies, if any.
8452 if (Glue.getNode())
8453 Ops.push_back(Glue);
8454
8455 // Emit the call.
8456 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8457 unsigned Op;
8458 switch (DAG.getTarget().getCodeModel()) {
8459 default:
8460 report_fatal_error("Unsupported code model");
8461 case CodeModel::Small:
8462 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8463 break;
8464 case CodeModel::Medium:
8465 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8467 break;
8468 case CodeModel::Large:
8469 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8471 break;
8472 }
8473
8474 if (IsTailCall) {
8476 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8477 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8478 return Ret;
8479 }
8480
8481 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8482 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8483 Glue = Chain.getValue(1);
8484
8485 // Mark the end of the call, which is glued to the call itself.
8486 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8487 Glue = Chain.getValue(1);
8488
8489 // Assign locations to each value returned by this call.
8491 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8492 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8493
8494 // Copy all of the result registers out of their specified physreg.
8495 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8496 auto &VA = RVLocs[i];
8497 // Copy the value out.
8498 SDValue RetValue =
8499 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8500 // Glue the RetValue to the end of the call sequence.
8501 Chain = RetValue.getValue(1);
8502 Glue = RetValue.getValue(2);
8503
8504 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8505 assert(VA.needsCustom());
8506 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8507 MVT::i32, Glue);
8508 Chain = RetValue2.getValue(1);
8509 Glue = RetValue2.getValue(2);
8510 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8511 RetValue, RetValue2);
8512 } else
8513 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8514
8515 InVals.push_back(RetValue);
8516 }
8517
8518 return Chain;
8519}
8520
8522 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8523 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8524 const Type *RetTy) const {
8526 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8527
8528 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8529 LoongArchABI::ABI ABI =
8530 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8531 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8532 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8533 return false;
8534 }
8535 return true;
8536}
8537
8539 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8541 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8542 SelectionDAG &DAG) const {
8543 // Stores the assignment of the return value to a location.
8545
8546 // Info about the registers and stack slot.
8547 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8548 *DAG.getContext());
8549
8550 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8551 nullptr, CC_LoongArch);
8552 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8553 report_fatal_error("GHC functions return void only");
8554 SDValue Glue;
8555 SmallVector<SDValue, 4> RetOps(1, Chain);
8556
8557 // Copy the result values into the output registers.
8558 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8559 SDValue Val = OutVals[OutIdx];
8560 CCValAssign &VA = RVLocs[i];
8561 assert(VA.isRegLoc() && "Can only return in registers!");
8562
8563 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8564 // Handle returning f64 on LA32D with a soft float ABI.
8565 assert(VA.isRegLoc() && "Expected return via registers");
8566 assert(VA.needsCustom());
8568 DAG.getVTList(MVT::i32, MVT::i32), Val);
8569 SDValue Lo = SplitF64.getValue(0);
8570 SDValue Hi = SplitF64.getValue(1);
8571 Register RegLo = VA.getLocReg();
8572 Register RegHi = RVLocs[++i].getLocReg();
8573
8574 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8575 Glue = Chain.getValue(1);
8576 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8577 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8578 Glue = Chain.getValue(1);
8579 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8580 } else {
8581 // Handle a 'normal' return.
8582 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8583 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8584
8585 // Guarantee that all emitted copies are stuck together.
8586 Glue = Chain.getValue(1);
8587 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8588 }
8589 }
8590
8591 RetOps[0] = Chain; // Update chain.
8592
8593 // Add the glue node if we have it.
8594 if (Glue.getNode())
8595 RetOps.push_back(Glue);
8596
8597 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8598}
8599
8600// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8601// Note: The following prefixes are excluded:
8602// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8603// as they can be represented using [x]vrepli.[whb]
8605 const APInt &SplatValue, const unsigned SplatBitSize) const {
8606 uint64_t RequiredImm = 0;
8607 uint64_t V = SplatValue.getZExtValue();
8608 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8609 // 4'b0101
8610 RequiredImm = (0b10101 << 8) | (V >> 8);
8611 return {true, RequiredImm};
8612 } else if (SplatBitSize == 32) {
8613 // 4'b0001
8614 if (!(V & 0xFFFF00FF)) {
8615 RequiredImm = (0b10001 << 8) | (V >> 8);
8616 return {true, RequiredImm};
8617 }
8618 // 4'b0010
8619 if (!(V & 0xFF00FFFF)) {
8620 RequiredImm = (0b10010 << 8) | (V >> 16);
8621 return {true, RequiredImm};
8622 }
8623 // 4'b0011
8624 if (!(V & 0x00FFFFFF)) {
8625 RequiredImm = (0b10011 << 8) | (V >> 24);
8626 return {true, RequiredImm};
8627 }
8628 // 4'b0110
8629 if ((V & 0xFFFF00FF) == 0xFF) {
8630 RequiredImm = (0b10110 << 8) | (V >> 8);
8631 return {true, RequiredImm};
8632 }
8633 // 4'b0111
8634 if ((V & 0xFF00FFFF) == 0xFFFF) {
8635 RequiredImm = (0b10111 << 8) | (V >> 16);
8636 return {true, RequiredImm};
8637 }
8638 // 4'b1010
8639 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8640 RequiredImm =
8641 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8642 return {true, RequiredImm};
8643 }
8644 } else if (SplatBitSize == 64) {
8645 // 4'b1011
8646 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8647 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8648 RequiredImm =
8649 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8650 return {true, RequiredImm};
8651 }
8652 // 4'b1100
8653 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8654 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8655 RequiredImm =
8656 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8657 return {true, RequiredImm};
8658 }
8659 // 4'b1001
8660 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8661 uint8_t res = 0;
8662 for (int i = 0; i < 8; ++i) {
8663 uint8_t byte = x & 0xFF;
8664 if (byte == 0 || byte == 0xFF)
8665 res |= ((byte & 1) << i);
8666 else
8667 return {false, 0};
8668 x >>= 8;
8669 }
8670 return {true, res};
8671 };
8672 auto [IsSame, Suffix] = sameBitsPreByte(V);
8673 if (IsSame) {
8674 RequiredImm = (0b11001 << 8) | Suffix;
8675 return {true, RequiredImm};
8676 }
8677 }
8678 return {false, RequiredImm};
8679}
8680
8682 EVT VT) const {
8683 if (!Subtarget.hasExtLSX())
8684 return false;
8685
8686 if (VT == MVT::f32) {
8687 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8688 return (masked == 0x3e000000 || masked == 0x40000000);
8689 }
8690
8691 if (VT == MVT::f64) {
8692 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8693 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8694 }
8695
8696 return false;
8697}
8698
8699bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8700 bool ForCodeSize) const {
8701 // TODO: Maybe need more checks here after vector extension is supported.
8702 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8703 return false;
8704 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8705 return false;
8706 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8707}
8708
8710 return true;
8711}
8712
8714 return true;
8715}
8716
8717bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8718 const Instruction *I) const {
8719 if (!Subtarget.is64Bit())
8720 return isa<LoadInst>(I) || isa<StoreInst>(I);
8721
8722 if (isa<LoadInst>(I))
8723 return true;
8724
8725 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8726 // require fences beacuse we can use amswap_db.[w/d].
8727 Type *Ty = I->getOperand(0)->getType();
8728 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8729 unsigned Size = Ty->getIntegerBitWidth();
8730 return (Size == 8 || Size == 16);
8731 }
8732
8733 return false;
8734}
8735
8737 LLVMContext &Context,
8738 EVT VT) const {
8739 if (!VT.isVector())
8740 return getPointerTy(DL);
8742}
8743
8745 EVT VT = Y.getValueType();
8746
8747 if (VT.isVector())
8748 return Subtarget.hasExtLSX() && VT.isInteger();
8749
8750 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8751}
8752
8754 const CallInst &I,
8755 MachineFunction &MF,
8756 unsigned Intrinsic) const {
8757 switch (Intrinsic) {
8758 default:
8759 return false;
8760 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8761 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8762 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8763 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8764 Info.opc = ISD::INTRINSIC_W_CHAIN;
8765 Info.memVT = MVT::i32;
8766 Info.ptrVal = I.getArgOperand(0);
8767 Info.offset = 0;
8768 Info.align = Align(4);
8771 return true;
8772 // TODO: Add more Intrinsics later.
8773 }
8774}
8775
8776// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8777// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8778// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8779// regression, we need to implement it manually.
8782
8784 Op == AtomicRMWInst::And) &&
8785 "Unable to expand");
8786 unsigned MinWordSize = 4;
8787
8788 IRBuilder<> Builder(AI);
8789 LLVMContext &Ctx = Builder.getContext();
8790 const DataLayout &DL = AI->getDataLayout();
8791 Type *ValueType = AI->getType();
8792 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8793
8794 Value *Addr = AI->getPointerOperand();
8795 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8796 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8797
8798 Value *AlignedAddr = Builder.CreateIntrinsic(
8799 Intrinsic::ptrmask, {PtrTy, IntTy},
8800 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8801 "AlignedAddr");
8802
8803 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8804 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8805 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8806 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8807 Value *Mask = Builder.CreateShl(
8808 ConstantInt::get(WordType,
8809 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8810 ShiftAmt, "Mask");
8811 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8812 Value *ValOperand_Shifted =
8813 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8814 ShiftAmt, "ValOperand_Shifted");
8815 Value *NewOperand;
8816 if (Op == AtomicRMWInst::And)
8817 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8818 else
8819 NewOperand = ValOperand_Shifted;
8820
8821 AtomicRMWInst *NewAI =
8822 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8823 AI->getOrdering(), AI->getSyncScopeID());
8824
8825 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8826 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8827 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8828 AI->replaceAllUsesWith(FinalOldResult);
8829 AI->eraseFromParent();
8830}
8831
8834 // TODO: Add more AtomicRMWInst that needs to be extended.
8835
8836 // Since floating-point operation requires a non-trivial set of data
8837 // operations, use CmpXChg to expand.
8838 if (AI->isFloatingPointOperation() ||
8844
8845 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8848 AI->getOperation() == AtomicRMWInst::Sub)) {
8850 }
8851
8852 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8853 if (Subtarget.hasLAMCAS()) {
8854 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8858 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8860 }
8861
8862 if (Size == 8 || Size == 16)
8865}
8866
8867static Intrinsic::ID
8869 AtomicRMWInst::BinOp BinOp) {
8870 if (GRLen == 64) {
8871 switch (BinOp) {
8872 default:
8873 llvm_unreachable("Unexpected AtomicRMW BinOp");
8875 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8876 case AtomicRMWInst::Add:
8877 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8878 case AtomicRMWInst::Sub:
8879 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8881 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8883 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8885 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8886 case AtomicRMWInst::Max:
8887 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8888 case AtomicRMWInst::Min:
8889 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8890 // TODO: support other AtomicRMWInst.
8891 }
8892 }
8893
8894 if (GRLen == 32) {
8895 switch (BinOp) {
8896 default:
8897 llvm_unreachable("Unexpected AtomicRMW BinOp");
8899 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8900 case AtomicRMWInst::Add:
8901 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8902 case AtomicRMWInst::Sub:
8903 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8905 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8907 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8909 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8910 case AtomicRMWInst::Max:
8911 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8912 case AtomicRMWInst::Min:
8913 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8914 // TODO: support other AtomicRMWInst.
8915 }
8916 }
8917
8918 llvm_unreachable("Unexpected GRLen\n");
8919}
8920
8923 AtomicCmpXchgInst *CI) const {
8924
8925 if (Subtarget.hasLAMCAS())
8927
8929 if (Size == 8 || Size == 16)
8932}
8933
8935 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8936 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8937 unsigned GRLen = Subtarget.getGRLen();
8938 AtomicOrdering FailOrd = CI->getFailureOrdering();
8939 Value *FailureOrdering =
8940 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8941 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8942 if (GRLen == 64) {
8943 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8944 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8945 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8946 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8947 }
8948 Type *Tys[] = {AlignedAddr->getType()};
8949 Value *Result = Builder.CreateIntrinsic(
8950 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8951 if (GRLen == 64)
8952 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8953 return Result;
8954}
8955
8957 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8958 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8959 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8960 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8961 // mask, as this produces better code than the LL/SC loop emitted by
8962 // int_loongarch_masked_atomicrmw_xchg.
8963 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8966 if (CVal->isZero())
8967 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8968 Builder.CreateNot(Mask, "Inv_Mask"),
8969 AI->getAlign(), Ord);
8970 if (CVal->isMinusOne())
8971 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8972 AI->getAlign(), Ord);
8973 }
8974
8975 unsigned GRLen = Subtarget.getGRLen();
8976 Value *Ordering =
8977 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8978 Type *Tys[] = {AlignedAddr->getType()};
8980 AI->getModule(),
8982
8983 if (GRLen == 64) {
8984 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8985 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8986 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8987 }
8988
8989 Value *Result;
8990
8991 // Must pass the shift amount needed to sign extend the loaded value prior
8992 // to performing a signed comparison for min/max. ShiftAmt is the number of
8993 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8994 // is the number of bits to left+right shift the value in order to
8995 // sign-extend.
8996 if (AI->getOperation() == AtomicRMWInst::Min ||
8998 const DataLayout &DL = AI->getDataLayout();
8999 unsigned ValWidth =
9000 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9001 Value *SextShamt =
9002 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9003 Result = Builder.CreateCall(LlwOpScwLoop,
9004 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9005 } else {
9006 Result =
9007 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9008 }
9009
9010 if (GRLen == 64)
9011 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9012 return Result;
9013}
9014
9016 const MachineFunction &MF, EVT VT) const {
9017 VT = VT.getScalarType();
9018
9019 if (!VT.isSimple())
9020 return false;
9021
9022 switch (VT.getSimpleVT().SimpleTy) {
9023 case MVT::f32:
9024 case MVT::f64:
9025 return true;
9026 default:
9027 break;
9028 }
9029
9030 return false;
9031}
9032
9034 const Constant *PersonalityFn) const {
9035 return LoongArch::R4;
9036}
9037
9039 const Constant *PersonalityFn) const {
9040 return LoongArch::R5;
9041}
9042
9043//===----------------------------------------------------------------------===//
9044// Target Optimization Hooks
9045//===----------------------------------------------------------------------===//
9046
9048 const LoongArchSubtarget &Subtarget) {
9049 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9050 // IEEE float has 23 digits and double has 52 digits.
9051 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9052 return RefinementSteps;
9053}
9054
9056 SelectionDAG &DAG, int Enabled,
9057 int &RefinementSteps,
9058 bool &UseOneConstNR,
9059 bool Reciprocal) const {
9060 if (Subtarget.hasFrecipe()) {
9061 SDLoc DL(Operand);
9062 EVT VT = Operand.getValueType();
9063
9064 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9065 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9066 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9067 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9068 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9069
9070 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9071 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9072
9073 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9074 if (Reciprocal)
9075 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9076
9077 return Estimate;
9078 }
9079 }
9080
9081 return SDValue();
9082}
9083
9085 SelectionDAG &DAG,
9086 int Enabled,
9087 int &RefinementSteps) const {
9088 if (Subtarget.hasFrecipe()) {
9089 SDLoc DL(Operand);
9090 EVT VT = Operand.getValueType();
9091
9092 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9093 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9094 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9095 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9096 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9097
9098 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9099 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9100
9101 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9102 }
9103 }
9104
9105 return SDValue();
9106}
9107
9108//===----------------------------------------------------------------------===//
9109// LoongArch Inline Assembly Support
9110//===----------------------------------------------------------------------===//
9111
9113LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9114 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9115 //
9116 // 'f': A floating-point register (if available).
9117 // 'k': A memory operand whose address is formed by a base register and
9118 // (optionally scaled) index register.
9119 // 'l': A signed 16-bit constant.
9120 // 'm': A memory operand whose address is formed by a base register and
9121 // offset that is suitable for use in instructions with the same
9122 // addressing mode as st.w and ld.w.
9123 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9124 // instruction)
9125 // 'I': A signed 12-bit constant (for arithmetic instructions).
9126 // 'J': Integer zero.
9127 // 'K': An unsigned 12-bit constant (for logic instructions).
9128 // "ZB": An address that is held in a general-purpose register. The offset is
9129 // zero.
9130 // "ZC": A memory operand whose address is formed by a base register and
9131 // offset that is suitable for use in instructions with the same
9132 // addressing mode as ll.w and sc.w.
9133 if (Constraint.size() == 1) {
9134 switch (Constraint[0]) {
9135 default:
9136 break;
9137 case 'f':
9138 case 'q':
9139 return C_RegisterClass;
9140 case 'l':
9141 case 'I':
9142 case 'J':
9143 case 'K':
9144 return C_Immediate;
9145 case 'k':
9146 return C_Memory;
9147 }
9148 }
9149
9150 if (Constraint == "ZC" || Constraint == "ZB")
9151 return C_Memory;
9152
9153 // 'm' is handled here.
9154 return TargetLowering::getConstraintType(Constraint);
9155}
9156
9157InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9158 StringRef ConstraintCode) const {
9159 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9163 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9164}
9165
9166std::pair<unsigned, const TargetRegisterClass *>
9167LoongArchTargetLowering::getRegForInlineAsmConstraint(
9168 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9169 // First, see if this is a constraint that directly corresponds to a LoongArch
9170 // register class.
9171 if (Constraint.size() == 1) {
9172 switch (Constraint[0]) {
9173 case 'r':
9174 // TODO: Support fixed vectors up to GRLen?
9175 if (VT.isVector())
9176 break;
9177 return std::make_pair(0U, &LoongArch::GPRRegClass);
9178 case 'q':
9179 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9180 case 'f':
9181 if (Subtarget.hasBasicF() && VT == MVT::f32)
9182 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9183 if (Subtarget.hasBasicD() && VT == MVT::f64)
9184 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9185 if (Subtarget.hasExtLSX() &&
9186 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9187 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9188 if (Subtarget.hasExtLASX() &&
9189 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9190 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9191 break;
9192 default:
9193 break;
9194 }
9195 }
9196
9197 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9198 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9199 // constraints while the official register name is prefixed with a '$'. So we
9200 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9201 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9202 // case insensitive, so no need to convert the constraint to upper case here.
9203 //
9204 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9205 // decode the usage of register name aliases into their official names. And
9206 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9207 // official register names.
9208 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9209 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9210 bool IsFP = Constraint[2] == 'f';
9211 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9212 std::pair<unsigned, const TargetRegisterClass *> R;
9214 TRI, join_items("", Temp.first, Temp.second), VT);
9215 // Match those names to the widest floating point register type available.
9216 if (IsFP) {
9217 unsigned RegNo = R.first;
9218 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9219 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9220 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9221 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9222 }
9223 }
9224 }
9225 return R;
9226 }
9227
9228 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9229}
9230
9231void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9232 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9233 SelectionDAG &DAG) const {
9234 // Currently only support length 1 constraints.
9235 if (Constraint.size() == 1) {
9236 switch (Constraint[0]) {
9237 case 'l':
9238 // Validate & create a 16-bit signed immediate operand.
9239 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9240 uint64_t CVal = C->getSExtValue();
9241 if (isInt<16>(CVal))
9242 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9243 Subtarget.getGRLenVT()));
9244 }
9245 return;
9246 case 'I':
9247 // Validate & create a 12-bit signed immediate operand.
9248 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9249 uint64_t CVal = C->getSExtValue();
9250 if (isInt<12>(CVal))
9251 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9252 Subtarget.getGRLenVT()));
9253 }
9254 return;
9255 case 'J':
9256 // Validate & create an integer zero operand.
9257 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9258 if (C->getZExtValue() == 0)
9259 Ops.push_back(
9260 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9261 return;
9262 case 'K':
9263 // Validate & create a 12-bit unsigned immediate operand.
9264 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9265 uint64_t CVal = C->getZExtValue();
9266 if (isUInt<12>(CVal))
9267 Ops.push_back(
9268 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9269 }
9270 return;
9271 default:
9272 break;
9273 }
9274 }
9276}
9277
9278#define GET_REGISTER_MATCHER
9279#include "LoongArchGenAsmMatcher.inc"
9280
9283 const MachineFunction &MF) const {
9284 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9285 std::string NewRegName = Name.second.str();
9286 Register Reg = MatchRegisterAltName(NewRegName);
9287 if (!Reg)
9288 Reg = MatchRegisterName(NewRegName);
9289 if (!Reg)
9290 return Reg;
9291 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9292 if (!ReservedRegs.test(Reg))
9293 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9294 StringRef(RegName) + "\"."));
9295 return Reg;
9296}
9297
9299 EVT VT, SDValue C) const {
9300 // TODO: Support vectors.
9301 if (!VT.isScalarInteger())
9302 return false;
9303
9304 // Omit the optimization if the data size exceeds GRLen.
9305 if (VT.getSizeInBits() > Subtarget.getGRLen())
9306 return false;
9307
9308 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9309 const APInt &Imm = ConstNode->getAPIntValue();
9310 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9311 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9312 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9313 return true;
9314 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9315 if (ConstNode->hasOneUse() &&
9316 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9317 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9318 return true;
9319 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9320 // in which the immediate has two set bits. Or Break (MUL x, imm)
9321 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9322 // equals to (1 << s0) - (1 << s1).
9323 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9324 unsigned Shifts = Imm.countr_zero();
9325 // Reject immediates which can be composed via a single LUI.
9326 if (Shifts >= 12)
9327 return false;
9328 // Reject multiplications can be optimized to
9329 // (SLLI (ALSL x, x, 1/2/3/4), s).
9330 APInt ImmPop = Imm.ashr(Shifts);
9331 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9332 return false;
9333 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9334 // since it needs one more instruction than other 3 cases.
9335 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9336 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9337 (ImmSmall - Imm).isPowerOf2())
9338 return true;
9339 }
9340 }
9341
9342 return false;
9343}
9344
9346 const AddrMode &AM,
9347 Type *Ty, unsigned AS,
9348 Instruction *I) const {
9349 // LoongArch has four basic addressing modes:
9350 // 1. reg
9351 // 2. reg + 12-bit signed offset
9352 // 3. reg + 14-bit signed offset left-shifted by 2
9353 // 4. reg1 + reg2
9354 // TODO: Add more checks after support vector extension.
9355
9356 // No global is ever allowed as a base.
9357 if (AM.BaseGV)
9358 return false;
9359
9360 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9361 // with `UAL` feature.
9362 if (!isInt<12>(AM.BaseOffs) &&
9363 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9364 return false;
9365
9366 switch (AM.Scale) {
9367 case 0:
9368 // "r+i" or just "i", depending on HasBaseReg.
9369 break;
9370 case 1:
9371 // "r+r+i" is not allowed.
9372 if (AM.HasBaseReg && AM.BaseOffs)
9373 return false;
9374 // Otherwise we have "r+r" or "r+i".
9375 break;
9376 case 2:
9377 // "2*r+r" or "2*r+i" is not allowed.
9378 if (AM.HasBaseReg || AM.BaseOffs)
9379 return false;
9380 // Allow "2*r" as "r+r".
9381 break;
9382 default:
9383 return false;
9384 }
9385
9386 return true;
9387}
9388
9390 return isInt<12>(Imm);
9391}
9392
9394 return isInt<12>(Imm);
9395}
9396
9398 // Zexts are free if they can be combined with a load.
9399 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9400 // poorly with type legalization of compares preferring sext.
9401 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9402 EVT MemVT = LD->getMemoryVT();
9403 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9404 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9405 LD->getExtensionType() == ISD::ZEXTLOAD))
9406 return true;
9407 }
9408
9409 return TargetLowering::isZExtFree(Val, VT2);
9410}
9411
9413 EVT DstVT) const {
9414 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9415}
9416
9418 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9419}
9420
9422 // TODO: Support vectors.
9423 if (Y.getValueType().isVector())
9424 return false;
9425
9426 return !isa<ConstantSDNode>(Y);
9427}
9428
9430 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9431 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9432}
9433
9435 Type *Ty, bool IsSigned) const {
9436 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9437 return true;
9438
9439 return IsSigned;
9440}
9441
9443 // Return false to suppress the unnecessary extensions if the LibCall
9444 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9445 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9446 Type.getSizeInBits() < Subtarget.getGRLen()))
9447 return false;
9448 return true;
9449}
9450
9451// memcpy, and other memory intrinsics, typically tries to use wider load/store
9452// if the source/dest is aligned and the copy size is large enough. We therefore
9453// want to align such objects passed to memory intrinsics.
9455 unsigned &MinSize,
9456 Align &PrefAlign) const {
9457 if (!isa<MemIntrinsic>(CI))
9458 return false;
9459
9460 if (Subtarget.is64Bit()) {
9461 MinSize = 8;
9462 PrefAlign = Align(8);
9463 } else {
9464 MinSize = 4;
9465 PrefAlign = Align(4);
9466 }
9467
9468 return true;
9469}
9470
9479
9480bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9481 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9482 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9483 bool IsABIRegCopy = CC.has_value();
9484 EVT ValueVT = Val.getValueType();
9485
9486 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9487 PartVT == MVT::f32) {
9488 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9489 // nan, and cast to f32.
9490 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9491 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9492 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9493 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9494 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9495 Parts[0] = Val;
9496 return true;
9497 }
9498
9499 return false;
9500}
9501
9502SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9503 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9504 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9505 bool IsABIRegCopy = CC.has_value();
9506
9507 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9508 PartVT == MVT::f32) {
9509 SDValue Val = Parts[0];
9510
9511 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9512 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9513 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9514 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9515 return Val;
9516 }
9517
9518 return SDValue();
9519}
9520
9521MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9522 CallingConv::ID CC,
9523 EVT VT) const {
9524 // Use f32 to pass f16.
9525 if (VT == MVT::f16 && Subtarget.hasBasicF())
9526 return MVT::f32;
9527
9529}
9530
9531unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9532 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9533 // Use f32 to pass f16.
9534 if (VT == MVT::f16 && Subtarget.hasBasicF())
9535 return 1;
9536
9538}
9539
9541 SDValue Op, const APInt &OriginalDemandedBits,
9542 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9543 unsigned Depth) const {
9544 EVT VT = Op.getValueType();
9545 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9546 unsigned Opc = Op.getOpcode();
9547 switch (Opc) {
9548 default:
9549 break;
9552 SDValue Src = Op.getOperand(0);
9553 MVT SrcVT = Src.getSimpleValueType();
9554 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9555 unsigned NumElts = SrcVT.getVectorNumElements();
9556
9557 // If we don't need the sign bits at all just return zero.
9558 if (OriginalDemandedBits.countr_zero() >= NumElts)
9559 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9560
9561 // Only demand the vector elements of the sign bits we need.
9562 APInt KnownUndef, KnownZero;
9563 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9564 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9565 TLO, Depth + 1))
9566 return true;
9567
9568 Known.Zero = KnownZero.zext(BitWidth);
9569 Known.Zero.setHighBits(BitWidth - NumElts);
9570
9571 // [X]VMSKLTZ only uses the MSB from each vector element.
9572 KnownBits KnownSrc;
9573 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9574 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9575 Depth + 1))
9576 return true;
9577
9578 if (KnownSrc.One[SrcBits - 1])
9579 Known.One.setLowBits(NumElts);
9580 else if (KnownSrc.Zero[SrcBits - 1])
9581 Known.Zero.setLowBits(NumElts);
9582
9583 // Attempt to avoid multi-use ops if we don't need anything from it.
9585 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9586 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9587 return false;
9588 }
9589 }
9590
9592 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9593}
9594
9596 unsigned Opc = VecOp.getOpcode();
9597
9598 // Assume target opcodes can't be scalarized.
9599 // TODO - do we have any exceptions?
9600 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9601 return false;
9602
9603 // If the vector op is not supported, try to convert to scalar.
9604 EVT VecVT = VecOp.getValueType();
9606 return true;
9607
9608 // If the vector op is supported, but the scalar op is not, the transform may
9609 // not be worthwhile.
9610 EVT ScalarVT = VecVT.getScalarType();
9611 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9612}
9613
9615 unsigned Index) const {
9617 return false;
9618
9619 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9620 return Index == 0;
9621}
9622
9624 unsigned Index) const {
9625 EVT EltVT = VT.getScalarType();
9626
9627 // Extract a scalar FP value from index 0 of a vector is free.
9628 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9629}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...