LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit())
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249
250 if (!Subtarget.hasBasicD()) {
252 if (Subtarget.is64Bit()) {
255 }
256 }
257 }
258
259 // Set operations for 'D' feature.
260
261 if (Subtarget.hasBasicD()) {
262 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
265 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
268 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
269
272 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
276 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
277 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
278 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
282 setOperationAction(ISD::FSIN, MVT::f64, Expand);
283 setOperationAction(ISD::FCOS, MVT::f64, Expand);
284 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
285 setOperationAction(ISD::FPOW, MVT::f64, Expand);
287 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
288 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
289 Subtarget.isSoftFPABI() ? LibCall : Custom);
290 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
291 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293
294 if (Subtarget.is64Bit())
295 setOperationAction(ISD::FRINT, MVT::f64, Legal);
296 }
297
298 // Set operations for 'LSX' feature.
299
300 if (Subtarget.hasExtLSX()) {
302 // Expand all truncating stores and extending loads.
303 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
304 setTruncStoreAction(VT, InnerVT, Expand);
307 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
308 }
309 // By default everything must be expanded. Then we will selectively turn
310 // on ones that can be effectively codegen'd.
311 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 }
314
315 for (MVT VT : LSXVTs) {
316 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
317 setOperationAction(ISD::BITCAST, VT, Legal);
319
323
328 }
329 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
332 Legal);
334 VT, Legal);
341 Expand);
350 }
351 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
353 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
355 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
358 }
359 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
363 setOperationAction(ISD::FSQRT, VT, Legal);
364 setOperationAction(ISD::FNEG, VT, Legal);
367 VT, Expand);
369 }
371 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
372 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
373 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
374 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
375
376 for (MVT VT :
377 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
378 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
380 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
381 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
382 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
383 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
384 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
385 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
386 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
387 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
388 }
389 }
390
391 // Set operations for 'LASX' feature.
392
393 if (Subtarget.hasExtLASX()) {
394 for (MVT VT : LASXVTs) {
395 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
396 setOperationAction(ISD::BITCAST, VT, Legal);
398
404
408 }
409 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
412 Legal);
414 VT, Legal);
421 Expand);
430 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
431 }
432 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
434 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
436 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
439 }
440 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
444 setOperationAction(ISD::FSQRT, VT, Legal);
445 setOperationAction(ISD::FNEG, VT, Legal);
448 VT, Expand);
450 }
451 }
452
453 // Set DAG combine for LA32 and LA64.
454
459
460 // Set DAG combine for 'LSX' feature.
461
462 if (Subtarget.hasExtLSX()) {
464 setTargetDAGCombine(ISD::BITCAST);
465 }
466
467 // Set DAG combine for 'LASX' feature.
468
469 if (Subtarget.hasExtLASX())
471
472 // Compute derived properties from the register classes.
473 computeRegisterProperties(Subtarget.getRegisterInfo());
474
476
479
480 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
481
483
484 // Function alignments.
486 // Set preferred alignments.
487 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
488 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
489 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
490
491 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
492 if (Subtarget.hasLAMCAS())
494
495 if (Subtarget.hasSCQ()) {
497 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
498 }
499}
500
502 const GlobalAddressSDNode *GA) const {
503 // In order to maximise the opportunity for common subexpression elimination,
504 // keep a separate ADD node for the global address offset instead of folding
505 // it in the global address node. Later peephole optimisations may choose to
506 // fold it back in when profitable.
507 return false;
508}
509
511 SelectionDAG &DAG) const {
512 switch (Op.getOpcode()) {
513 case ISD::ATOMIC_FENCE:
514 return lowerATOMIC_FENCE(Op, DAG);
516 return lowerEH_DWARF_CFA(Op, DAG);
518 return lowerGlobalAddress(Op, DAG);
520 return lowerGlobalTLSAddress(Op, DAG);
522 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
524 return lowerINTRINSIC_W_CHAIN(Op, DAG);
526 return lowerINTRINSIC_VOID(Op, DAG);
528 return lowerBlockAddress(Op, DAG);
529 case ISD::JumpTable:
530 return lowerJumpTable(Op, DAG);
531 case ISD::SHL_PARTS:
532 return lowerShiftLeftParts(Op, DAG);
533 case ISD::SRA_PARTS:
534 return lowerShiftRightParts(Op, DAG, true);
535 case ISD::SRL_PARTS:
536 return lowerShiftRightParts(Op, DAG, false);
538 return lowerConstantPool(Op, DAG);
539 case ISD::FP_TO_SINT:
540 return lowerFP_TO_SINT(Op, DAG);
541 case ISD::BITCAST:
542 return lowerBITCAST(Op, DAG);
543 case ISD::UINT_TO_FP:
544 return lowerUINT_TO_FP(Op, DAG);
545 case ISD::SINT_TO_FP:
546 return lowerSINT_TO_FP(Op, DAG);
547 case ISD::VASTART:
548 return lowerVASTART(Op, DAG);
549 case ISD::FRAMEADDR:
550 return lowerFRAMEADDR(Op, DAG);
551 case ISD::RETURNADDR:
552 return lowerRETURNADDR(Op, DAG);
554 return lowerWRITE_REGISTER(Op, DAG);
556 return lowerINSERT_VECTOR_ELT(Op, DAG);
558 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
560 return lowerBUILD_VECTOR(Op, DAG);
562 return lowerCONCAT_VECTORS(Op, DAG);
564 return lowerVECTOR_SHUFFLE(Op, DAG);
565 case ISD::BITREVERSE:
566 return lowerBITREVERSE(Op, DAG);
568 return lowerSCALAR_TO_VECTOR(Op, DAG);
569 case ISD::PREFETCH:
570 return lowerPREFETCH(Op, DAG);
571 case ISD::SELECT:
572 return lowerSELECT(Op, DAG);
573 case ISD::BRCOND:
574 return lowerBRCOND(Op, DAG);
575 case ISD::FP_TO_FP16:
576 return lowerFP_TO_FP16(Op, DAG);
577 case ISD::FP16_TO_FP:
578 return lowerFP16_TO_FP(Op, DAG);
579 case ISD::FP_TO_BF16:
580 return lowerFP_TO_BF16(Op, DAG);
581 case ISD::BF16_TO_FP:
582 return lowerBF16_TO_FP(Op, DAG);
583 case ISD::VECREDUCE_ADD:
584 return lowerVECREDUCE_ADD(Op, DAG);
585 case ISD::VECREDUCE_AND:
586 case ISD::VECREDUCE_OR:
587 case ISD::VECREDUCE_XOR:
588 case ISD::VECREDUCE_SMAX:
589 case ISD::VECREDUCE_SMIN:
590 case ISD::VECREDUCE_UMAX:
591 case ISD::VECREDUCE_UMIN:
592 return lowerVECREDUCE(Op, DAG);
593 case ISD::ConstantFP:
594 return lowerConstantFP(Op, DAG);
595 }
596 return SDValue();
597}
598
599SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
600 SelectionDAG &DAG) const {
601 EVT VT = Op.getValueType();
603 const APFloat &FPVal = CFP->getValueAPF();
604 SDLoc DL(CFP);
605
606 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
607 (VT == MVT::f64 && Subtarget.hasBasicD()));
608
609 // If value is 0.0 or -0.0, just ignore it.
610 if (FPVal.isZero())
611 return SDValue();
612
613 // If lsx enabled, use cheaper 'vldi' instruction if possible.
614 if (isFPImmVLDILegal(FPVal, VT))
615 return SDValue();
616
617 // Construct as integer, and move to float register.
618 APInt INTVal = FPVal.bitcastToAPInt();
619
620 // If more than MaterializeFPImmInsNum instructions will be used to
621 // generate the INTVal and move it to float register, fallback to
622 // use floating point load from the constant pool.
624 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
625 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
626 return SDValue();
627
628 switch (VT.getSimpleVT().SimpleTy) {
629 default:
630 llvm_unreachable("Unexpected floating point type!");
631 break;
632 case MVT::f32: {
633 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
634 if (Subtarget.is64Bit())
635 NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
636 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
638 DL, VT, NewVal);
639 }
640 case MVT::f64: {
641 if (Subtarget.is64Bit()) {
642 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
643 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
644 }
645 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
646 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
647 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
648 }
649 }
650
651 return SDValue();
652}
653
654// Lower vecreduce_add using vhaddw instructions.
655// For Example:
656// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
657// can be lowered to:
658// VHADDW_D_W vr0, vr0, vr0
659// VHADDW_Q_D vr0, vr0, vr0
660// VPICKVE2GR_D a0, vr0, 0
661// ADDI_W a0, a0, 0
662SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
663 SelectionDAG &DAG) const {
664
665 SDLoc DL(Op);
666 MVT OpVT = Op.getSimpleValueType();
667 SDValue Val = Op.getOperand(0);
668
669 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
670 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
671 unsigned ResBits = OpVT.getScalarSizeInBits();
672
673 unsigned LegalVecSize = 128;
674 bool isLASX256Vector =
675 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
676
677 // Ensure operand type legal or enable it legal.
678 while (!isTypeLegal(Val.getSimpleValueType())) {
679 Val = DAG.WidenVector(Val, DL);
680 }
681
682 // NumEles is designed for iterations count, v4i32 for LSX
683 // and v8i32 for LASX should have the same count.
684 if (isLASX256Vector) {
685 NumEles /= 2;
686 LegalVecSize = 256;
687 }
688
689 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
690 MVT IntTy = MVT::getIntegerVT(EleBits);
691 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
692 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
693 }
694
695 if (isLASX256Vector) {
696 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
697 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
698 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
699 }
700
701 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
702 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
703 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
704}
705
706// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
707// For Example:
708// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
709// can be lowered to:
710// VBSRL_V vr1, vr0, 8
711// VMAX_W vr0, vr1, vr0
712// VBSRL_V vr1, vr0, 4
713// VMAX_W vr0, vr1, vr0
714// VPICKVE2GR_W a0, vr0, 0
715// For 256 bit vector, it is illegal and will be spilt into
716// two 128 bit vector by default then processed by this.
717SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
718 SelectionDAG &DAG) const {
719 SDLoc DL(Op);
720
721 MVT OpVT = Op.getSimpleValueType();
722 SDValue Val = Op.getOperand(0);
723
724 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
725 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
726
727 // Ensure operand type legal or enable it legal.
728 while (!isTypeLegal(Val.getSimpleValueType())) {
729 Val = DAG.WidenVector(Val, DL);
730 }
731
732 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
733 MVT VecTy = Val.getSimpleValueType();
734 MVT GRLenVT = Subtarget.getGRLenVT();
735
736 for (int i = NumEles; i > 1; i /= 2) {
737 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
738 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
739 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
740 }
741
742 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
743 DAG.getConstant(0, DL, GRLenVT));
744}
745
746SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
747 SelectionDAG &DAG) const {
748 unsigned IsData = Op.getConstantOperandVal(4);
749
750 // We don't support non-data prefetch.
751 // Just preserve the chain.
752 if (!IsData)
753 return Op.getOperand(0);
754
755 return Op;
756}
757
758// Return true if Val is equal to (setcc LHS, RHS, CC).
759// Return false if Val is the inverse of (setcc LHS, RHS, CC).
760// Otherwise, return std::nullopt.
761static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
762 ISD::CondCode CC, SDValue Val) {
763 assert(Val->getOpcode() == ISD::SETCC);
764 SDValue LHS2 = Val.getOperand(0);
765 SDValue RHS2 = Val.getOperand(1);
766 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
767
768 if (LHS == LHS2 && RHS == RHS2) {
769 if (CC == CC2)
770 return true;
771 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
772 return false;
773 } else if (LHS == RHS2 && RHS == LHS2) {
775 if (CC == CC2)
776 return true;
777 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
778 return false;
779 }
780
781 return std::nullopt;
782}
783
785 const LoongArchSubtarget &Subtarget) {
786 SDValue CondV = N->getOperand(0);
787 SDValue TrueV = N->getOperand(1);
788 SDValue FalseV = N->getOperand(2);
789 MVT VT = N->getSimpleValueType(0);
790 SDLoc DL(N);
791
792 // (select c, -1, y) -> -c | y
793 if (isAllOnesConstant(TrueV)) {
794 SDValue Neg = DAG.getNegative(CondV, DL, VT);
795 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
796 }
797 // (select c, y, -1) -> (c-1) | y
798 if (isAllOnesConstant(FalseV)) {
799 SDValue Neg =
800 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
801 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
802 }
803
804 // (select c, 0, y) -> (c-1) & y
805 if (isNullConstant(TrueV)) {
806 SDValue Neg =
807 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
808 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
809 }
810 // (select c, y, 0) -> -c & y
811 if (isNullConstant(FalseV)) {
812 SDValue Neg = DAG.getNegative(CondV, DL, VT);
813 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
814 }
815
816 // select c, ~x, x --> xor -c, x
817 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
818 const APInt &TrueVal = TrueV->getAsAPIntVal();
819 const APInt &FalseVal = FalseV->getAsAPIntVal();
820 if (~TrueVal == FalseVal) {
821 SDValue Neg = DAG.getNegative(CondV, DL, VT);
822 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
823 }
824 }
825
826 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
827 // when both truev and falsev are also setcc.
828 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
829 FalseV.getOpcode() == ISD::SETCC) {
830 SDValue LHS = CondV.getOperand(0);
831 SDValue RHS = CondV.getOperand(1);
832 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
833
834 // (select x, x, y) -> x | y
835 // (select !x, x, y) -> x & y
836 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
837 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
838 DAG.getFreeze(FalseV));
839 }
840 // (select x, y, x) -> x & y
841 // (select !x, y, x) -> x | y
842 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
843 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
844 DAG.getFreeze(TrueV), FalseV);
845 }
846 }
847
848 return SDValue();
849}
850
851// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
852// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
853// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
854// being `0` or `-1`. In such cases we can replace `select` with `and`.
855// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
856// than `c0`?
857static SDValue
859 const LoongArchSubtarget &Subtarget) {
860 unsigned SelOpNo = 0;
861 SDValue Sel = BO->getOperand(0);
862 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
863 SelOpNo = 1;
864 Sel = BO->getOperand(1);
865 }
866
867 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
868 return SDValue();
869
870 unsigned ConstSelOpNo = 1;
871 unsigned OtherSelOpNo = 2;
872 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
873 ConstSelOpNo = 2;
874 OtherSelOpNo = 1;
875 }
876 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
877 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
878 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
879 return SDValue();
880
881 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
882 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
883 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
884 return SDValue();
885
886 SDLoc DL(Sel);
887 EVT VT = BO->getValueType(0);
888
889 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
890 if (SelOpNo == 1)
891 std::swap(NewConstOps[0], NewConstOps[1]);
892
893 SDValue NewConstOp =
894 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
895 if (!NewConstOp)
896 return SDValue();
897
898 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
899 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
900 return SDValue();
901
902 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
903 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
904 if (SelOpNo == 1)
905 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
906 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
907
908 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
909 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
910 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
911}
912
913// Changes the condition code and swaps operands if necessary, so the SetCC
914// operation matches one of the comparisons supported directly by branches
915// in the LoongArch ISA. May adjust compares to favor compare with 0 over
916// compare with 1/-1.
918 ISD::CondCode &CC, SelectionDAG &DAG) {
919 // If this is a single bit test that can't be handled by ANDI, shift the
920 // bit to be tested to the MSB and perform a signed compare with 0.
921 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
922 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
923 isa<ConstantSDNode>(LHS.getOperand(1))) {
924 uint64_t Mask = LHS.getConstantOperandVal(1);
925 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
926 unsigned ShAmt = 0;
927 if (isPowerOf2_64(Mask)) {
928 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
929 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
930 } else {
931 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
932 }
933
934 LHS = LHS.getOperand(0);
935 if (ShAmt != 0)
936 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
937 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
938 return;
939 }
940 }
941
942 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
943 int64_t C = RHSC->getSExtValue();
944 switch (CC) {
945 default:
946 break;
947 case ISD::SETGT:
948 // Convert X > -1 to X >= 0.
949 if (C == -1) {
950 RHS = DAG.getConstant(0, DL, RHS.getValueType());
951 CC = ISD::SETGE;
952 return;
953 }
954 break;
955 case ISD::SETLT:
956 // Convert X < 1 to 0 >= X.
957 if (C == 1) {
958 RHS = LHS;
959 LHS = DAG.getConstant(0, DL, RHS.getValueType());
960 CC = ISD::SETGE;
961 return;
962 }
963 break;
964 }
965 }
966
967 switch (CC) {
968 default:
969 break;
970 case ISD::SETGT:
971 case ISD::SETLE:
972 case ISD::SETUGT:
973 case ISD::SETULE:
975 std::swap(LHS, RHS);
976 break;
977 }
978}
979
980SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
981 SelectionDAG &DAG) const {
982 SDValue CondV = Op.getOperand(0);
983 SDValue TrueV = Op.getOperand(1);
984 SDValue FalseV = Op.getOperand(2);
985 SDLoc DL(Op);
986 MVT VT = Op.getSimpleValueType();
987 MVT GRLenVT = Subtarget.getGRLenVT();
988
989 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
990 return V;
991
992 if (Op.hasOneUse()) {
993 unsigned UseOpc = Op->user_begin()->getOpcode();
994 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
995 SDNode *BinOp = *Op->user_begin();
996 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
997 DAG, Subtarget)) {
998 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
999 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1000 // may return a constant node and cause crash in lowerSELECT.
1001 if (NewSel.getOpcode() == ISD::SELECT)
1002 return lowerSELECT(NewSel, DAG);
1003 return NewSel;
1004 }
1005 }
1006 }
1007
1008 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1009 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1010 // (select condv, truev, falsev)
1011 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1012 if (CondV.getOpcode() != ISD::SETCC ||
1013 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1014 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1015 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1016
1017 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1018
1019 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1020 }
1021
1022 // If the CondV is the output of a SETCC node which operates on GRLenVT
1023 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1024 // to take advantage of the integer compare+branch instructions. i.e.: (select
1025 // (setcc lhs, rhs, cc), truev, falsev)
1026 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1027 SDValue LHS = CondV.getOperand(0);
1028 SDValue RHS = CondV.getOperand(1);
1029 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1030
1031 // Special case for a select of 2 constants that have a difference of 1.
1032 // Normally this is done by DAGCombine, but if the select is introduced by
1033 // type legalization or op legalization, we miss it. Restricting to SETLT
1034 // case for now because that is what signed saturating add/sub need.
1035 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1036 // but we would probably want to swap the true/false values if the condition
1037 // is SETGE/SETLE to avoid an XORI.
1038 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1039 CCVal == ISD::SETLT) {
1040 const APInt &TrueVal = TrueV->getAsAPIntVal();
1041 const APInt &FalseVal = FalseV->getAsAPIntVal();
1042 if (TrueVal - 1 == FalseVal)
1043 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1044 if (TrueVal + 1 == FalseVal)
1045 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1046 }
1047
1048 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1049 // 1 < x ? x : 1 -> 0 < x ? x : 1
1050 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1051 RHS == TrueV && LHS == FalseV) {
1052 LHS = DAG.getConstant(0, DL, VT);
1053 // 0 <u x is the same as x != 0.
1054 if (CCVal == ISD::SETULT) {
1055 std::swap(LHS, RHS);
1056 CCVal = ISD::SETNE;
1057 }
1058 }
1059
1060 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1061 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1062 RHS == FalseV) {
1063 RHS = DAG.getConstant(0, DL, VT);
1064 }
1065
1066 SDValue TargetCC = DAG.getCondCode(CCVal);
1067
1068 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1069 // (select (setcc lhs, rhs, CC), constant, falsev)
1070 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1071 std::swap(TrueV, FalseV);
1072 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1073 }
1074
1075 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1076 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1077}
1078
1079SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1080 SelectionDAG &DAG) const {
1081 SDValue CondV = Op.getOperand(1);
1082 SDLoc DL(Op);
1083 MVT GRLenVT = Subtarget.getGRLenVT();
1084
1085 if (CondV.getOpcode() == ISD::SETCC) {
1086 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1087 SDValue LHS = CondV.getOperand(0);
1088 SDValue RHS = CondV.getOperand(1);
1089 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1090
1091 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1092
1093 SDValue TargetCC = DAG.getCondCode(CCVal);
1094 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1095 Op.getOperand(0), LHS, RHS, TargetCC,
1096 Op.getOperand(2));
1097 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1098 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1099 Op.getOperand(0), CondV, Op.getOperand(2));
1100 }
1101 }
1102
1103 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1104 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1105 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1106}
1107
1108SDValue
1109LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1110 SelectionDAG &DAG) const {
1111 SDLoc DL(Op);
1112 MVT OpVT = Op.getSimpleValueType();
1113
1114 SDValue Vector = DAG.getUNDEF(OpVT);
1115 SDValue Val = Op.getOperand(0);
1116 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1117
1118 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1119}
1120
1121SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1122 SelectionDAG &DAG) const {
1123 EVT ResTy = Op->getValueType(0);
1124 SDValue Src = Op->getOperand(0);
1125 SDLoc DL(Op);
1126
1127 // LoongArchISD::BITREV_8B is not supported on LA32.
1128 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1129 return SDValue();
1130
1131 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1132 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1133 unsigned int NewEltNum = NewVT.getVectorNumElements();
1134
1135 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1136
1138 for (unsigned int i = 0; i < NewEltNum; i++) {
1139 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1140 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1141 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1142 ? (unsigned)LoongArchISD::BITREV_8B
1143 : (unsigned)ISD::BITREVERSE;
1144 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1145 }
1146 SDValue Res =
1147 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1148
1149 switch (ResTy.getSimpleVT().SimpleTy) {
1150 default:
1151 return SDValue();
1152 case MVT::v16i8:
1153 case MVT::v32i8:
1154 return Res;
1155 case MVT::v8i16:
1156 case MVT::v16i16:
1157 case MVT::v4i32:
1158 case MVT::v8i32: {
1160 for (unsigned int i = 0; i < NewEltNum; i++)
1161 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1162 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1163 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1164 }
1165 }
1166}
1167
1168// Widen element type to get a new mask value (if possible).
1169// For example:
1170// shufflevector <4 x i32> %a, <4 x i32> %b,
1171// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1172// is equivalent to:
1173// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1174// can be lowered to:
1175// VPACKOD_D vr0, vr0, vr1
1177 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1178 unsigned EltBits = VT.getScalarSizeInBits();
1179
1180 if (EltBits > 32 || EltBits == 1)
1181 return SDValue();
1182
1183 SmallVector<int, 8> NewMask;
1184 if (widenShuffleMaskElts(Mask, NewMask)) {
1185 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1186 : MVT::getIntegerVT(EltBits * 2);
1187 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1188 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1189 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1190 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1191 return DAG.getBitcast(
1192 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1193 }
1194 }
1195
1196 return SDValue();
1197}
1198
1199/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1200/// instruction.
1201// The funciton matches elements from one of the input vector shuffled to the
1202// left or right with zeroable elements 'shifted in'. It handles both the
1203// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1204// lane.
1205// Mostly copied from X86.
1206static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1207 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1208 int MaskOffset, const APInt &Zeroable) {
1209 int Size = Mask.size();
1210 unsigned SizeInBits = Size * ScalarSizeInBits;
1211
1212 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1213 for (int i = 0; i < Size; i += Scale)
1214 for (int j = 0; j < Shift; ++j)
1215 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1216 return false;
1217
1218 return true;
1219 };
1220
1221 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1222 int Step = 1) {
1223 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1224 if (!(Mask[i] == -1 || Mask[i] == Low))
1225 return false;
1226 return true;
1227 };
1228
1229 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1230 for (int i = 0; i != Size; i += Scale) {
1231 unsigned Pos = Left ? i + Shift : i;
1232 unsigned Low = Left ? i : i + Shift;
1233 unsigned Len = Scale - Shift;
1234 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1235 return -1;
1236 }
1237
1238 int ShiftEltBits = ScalarSizeInBits * Scale;
1239 bool ByteShift = ShiftEltBits > 64;
1240 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1241 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1242 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1243
1244 // Normalize the scale for byte shifts to still produce an i64 element
1245 // type.
1246 Scale = ByteShift ? Scale / 2 : Scale;
1247
1248 // We need to round trip through the appropriate type for the shift.
1249 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1250 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1251 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1252 return (int)ShiftAmt;
1253 };
1254
1255 unsigned MaxWidth = 128;
1256 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1257 for (int Shift = 1; Shift != Scale; ++Shift)
1258 for (bool Left : {true, false})
1259 if (CheckZeros(Shift, Scale, Left)) {
1260 int ShiftAmt = MatchShift(Shift, Scale, Left);
1261 if (0 < ShiftAmt)
1262 return ShiftAmt;
1263 }
1264
1265 // no match
1266 return -1;
1267}
1268
1269/// Lower VECTOR_SHUFFLE as shift (if possible).
1270///
1271/// For example:
1272/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1273/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1274/// is lowered to:
1275/// (VBSLL_V $v0, $v0, 4)
1276///
1277/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1278/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1279/// is lowered to:
1280/// (VSLLI_D $v0, $v0, 32)
1282 MVT VT, SDValue V1, SDValue V2,
1283 SelectionDAG &DAG,
1284 const LoongArchSubtarget &Subtarget,
1285 const APInt &Zeroable) {
1286 int Size = Mask.size();
1287 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1288
1289 MVT ShiftVT;
1290 SDValue V = V1;
1291 unsigned Opcode;
1292
1293 // Try to match shuffle against V1 shift.
1294 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1295 Mask, 0, Zeroable);
1296
1297 // If V1 failed, try to match shuffle against V2 shift.
1298 if (ShiftAmt < 0) {
1299 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1300 Mask, Size, Zeroable);
1301 V = V2;
1302 }
1303
1304 if (ShiftAmt < 0)
1305 return SDValue();
1306
1307 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1308 "Illegal integer vector type");
1309 V = DAG.getBitcast(ShiftVT, V);
1310 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1311 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1312 return DAG.getBitcast(VT, V);
1313}
1314
1315/// Determine whether a range fits a regular pattern of values.
1316/// This function accounts for the possibility of jumping over the End iterator.
1317template <typename ValType>
1318static bool
1320 unsigned CheckStride,
1322 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1323 auto &I = Begin;
1324
1325 while (I != End) {
1326 if (*I != -1 && *I != ExpectedIndex)
1327 return false;
1328 ExpectedIndex += ExpectedIndexStride;
1329
1330 // Incrementing past End is undefined behaviour so we must increment one
1331 // step at a time and check for End at each step.
1332 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1333 ; // Empty loop body.
1334 }
1335 return true;
1336}
1337
1338/// Compute whether each element of a shuffle is zeroable.
1339///
1340/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1342 SDValue V2, APInt &KnownUndef,
1343 APInt &KnownZero) {
1344 int Size = Mask.size();
1345 KnownUndef = KnownZero = APInt::getZero(Size);
1346
1347 V1 = peekThroughBitcasts(V1);
1348 V2 = peekThroughBitcasts(V2);
1349
1350 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1351 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1352
1353 int VectorSizeInBits = V1.getValueSizeInBits();
1354 int ScalarSizeInBits = VectorSizeInBits / Size;
1355 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1356 (void)ScalarSizeInBits;
1357
1358 for (int i = 0; i < Size; ++i) {
1359 int M = Mask[i];
1360 if (M < 0) {
1361 KnownUndef.setBit(i);
1362 continue;
1363 }
1364 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1365 KnownZero.setBit(i);
1366 continue;
1367 }
1368 }
1369}
1370
1371/// Test whether a shuffle mask is equivalent within each sub-lane.
1372///
1373/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1374/// non-trivial to compute in the face of undef lanes. The representation is
1375/// suitable for use with existing 128-bit shuffles as entries from the second
1376/// vector have been remapped to [LaneSize, 2*LaneSize).
1377static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1378 ArrayRef<int> Mask,
1379 SmallVectorImpl<int> &RepeatedMask) {
1380 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1381 RepeatedMask.assign(LaneSize, -1);
1382 int Size = Mask.size();
1383 for (int i = 0; i < Size; ++i) {
1384 assert(Mask[i] == -1 || Mask[i] >= 0);
1385 if (Mask[i] < 0)
1386 continue;
1387 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1388 // This entry crosses lanes, so there is no way to model this shuffle.
1389 return false;
1390
1391 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1392 // Adjust second vector indices to start at LaneSize instead of Size.
1393 int LocalM =
1394 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1395 if (RepeatedMask[i % LaneSize] < 0)
1396 // This is the first non-undef entry in this slot of a 128-bit lane.
1397 RepeatedMask[i % LaneSize] = LocalM;
1398 else if (RepeatedMask[i % LaneSize] != LocalM)
1399 // Found a mismatch with the repeated mask.
1400 return false;
1401 }
1402 return true;
1403}
1404
1405/// Attempts to match vector shuffle as byte rotation.
1407 ArrayRef<int> Mask) {
1408
1409 SDValue Lo, Hi;
1410 SmallVector<int, 16> RepeatedMask;
1411
1412 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1413 return -1;
1414
1415 int NumElts = RepeatedMask.size();
1416 int Rotation = 0;
1417 int Scale = 16 / NumElts;
1418
1419 for (int i = 0; i < NumElts; ++i) {
1420 int M = RepeatedMask[i];
1421 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1422 "Unexpected mask index.");
1423 if (M < 0)
1424 continue;
1425
1426 // Determine where a rotated vector would have started.
1427 int StartIdx = i - (M % NumElts);
1428 if (StartIdx == 0)
1429 return -1;
1430
1431 // If we found the tail of a vector the rotation must be the missing
1432 // front. If we found the head of a vector, it must be how much of the
1433 // head.
1434 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1435
1436 if (Rotation == 0)
1437 Rotation = CandidateRotation;
1438 else if (Rotation != CandidateRotation)
1439 return -1;
1440
1441 // Compute which value this mask is pointing at.
1442 SDValue MaskV = M < NumElts ? V1 : V2;
1443
1444 // Compute which of the two target values this index should be assigned
1445 // to. This reflects whether the high elements are remaining or the low
1446 // elements are remaining.
1447 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1448
1449 // Either set up this value if we've not encountered it before, or check
1450 // that it remains consistent.
1451 if (!TargetV)
1452 TargetV = MaskV;
1453 else if (TargetV != MaskV)
1454 return -1;
1455 }
1456
1457 // Check that we successfully analyzed the mask, and normalize the results.
1458 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1459 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1460 if (!Lo)
1461 Lo = Hi;
1462 else if (!Hi)
1463 Hi = Lo;
1464
1465 V1 = Lo;
1466 V2 = Hi;
1467
1468 return Rotation * Scale;
1469}
1470
1471/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1472///
1473/// For example:
1474/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1475/// <2 x i32> <i32 3, i32 0>
1476/// is lowered to:
1477/// (VBSRL_V $v1, $v1, 8)
1478/// (VBSLL_V $v0, $v0, 8)
1479/// (VOR_V $v0, $V0, $v1)
1480static SDValue
1482 SDValue V1, SDValue V2, SelectionDAG &DAG,
1483 const LoongArchSubtarget &Subtarget) {
1484
1485 SDValue Lo = V1, Hi = V2;
1486 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1487 if (ByteRotation <= 0)
1488 return SDValue();
1489
1490 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1491 Lo = DAG.getBitcast(ByteVT, Lo);
1492 Hi = DAG.getBitcast(ByteVT, Hi);
1493
1494 int LoByteShift = 16 - ByteRotation;
1495 int HiByteShift = ByteRotation;
1496 MVT GRLenVT = Subtarget.getGRLenVT();
1497
1498 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1499 DAG.getConstant(LoByteShift, DL, GRLenVT));
1500 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1501 DAG.getConstant(HiByteShift, DL, GRLenVT));
1502 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1503}
1504
1505/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1506///
1507/// For example:
1508/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1509/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1510/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1511/// is lowered to:
1512/// (VREPLI $v1, 0)
1513/// (VILVL $v0, $v1, $v0)
1515 ArrayRef<int> Mask, MVT VT,
1516 SDValue V1, SDValue V2,
1517 SelectionDAG &DAG,
1518 const APInt &Zeroable) {
1519 int Bits = VT.getSizeInBits();
1520 int EltBits = VT.getScalarSizeInBits();
1521 int NumElements = VT.getVectorNumElements();
1522
1523 if (Zeroable.isAllOnes())
1524 return DAG.getConstant(0, DL, VT);
1525
1526 // Define a helper function to check a particular ext-scale and lower to it if
1527 // valid.
1528 auto Lower = [&](int Scale) -> SDValue {
1529 SDValue InputV;
1530 bool AnyExt = true;
1531 int Offset = 0;
1532 for (int i = 0; i < NumElements; i++) {
1533 int M = Mask[i];
1534 if (M < 0)
1535 continue;
1536 if (i % Scale != 0) {
1537 // Each of the extended elements need to be zeroable.
1538 if (!Zeroable[i])
1539 return SDValue();
1540
1541 AnyExt = false;
1542 continue;
1543 }
1544
1545 // Each of the base elements needs to be consecutive indices into the
1546 // same input vector.
1547 SDValue V = M < NumElements ? V1 : V2;
1548 M = M % NumElements;
1549 if (!InputV) {
1550 InputV = V;
1551 Offset = M - (i / Scale);
1552
1553 // These offset can't be handled
1554 if (Offset % (NumElements / Scale))
1555 return SDValue();
1556 } else if (InputV != V)
1557 return SDValue();
1558
1559 if (M != (Offset + (i / Scale)))
1560 return SDValue(); // Non-consecutive strided elements.
1561 }
1562
1563 // If we fail to find an input, we have a zero-shuffle which should always
1564 // have already been handled.
1565 if (!InputV)
1566 return SDValue();
1567
1568 do {
1569 unsigned VilVLoHi = LoongArchISD::VILVL;
1570 if (Offset >= (NumElements / 2)) {
1571 VilVLoHi = LoongArchISD::VILVH;
1572 Offset -= (NumElements / 2);
1573 }
1574
1575 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1576 SDValue Ext =
1577 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1578 InputV = DAG.getBitcast(InputVT, InputV);
1579 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1580 Scale /= 2;
1581 EltBits *= 2;
1582 NumElements /= 2;
1583 } while (Scale > 1);
1584 return DAG.getBitcast(VT, InputV);
1585 };
1586
1587 // Each iteration, try extending the elements half as much, but into twice as
1588 // many elements.
1589 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1590 NumExtElements *= 2) {
1591 if (SDValue V = Lower(NumElements / NumExtElements))
1592 return V;
1593 }
1594 return SDValue();
1595}
1596
1597/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1598///
1599/// VREPLVEI performs vector broadcast based on an element specified by an
1600/// integer immediate, with its mask being similar to:
1601/// <x, x, x, ...>
1602/// where x is any valid index.
1603///
1604/// When undef's appear in the mask they are treated as if they were whatever
1605/// value is necessary in order to fit the above form.
1606static SDValue
1608 SDValue V1, SelectionDAG &DAG,
1609 const LoongArchSubtarget &Subtarget) {
1610 int SplatIndex = -1;
1611 for (const auto &M : Mask) {
1612 if (M != -1) {
1613 SplatIndex = M;
1614 break;
1615 }
1616 }
1617
1618 if (SplatIndex == -1)
1619 return DAG.getUNDEF(VT);
1620
1621 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1622 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1623 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1624 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1625 }
1626
1627 return SDValue();
1628}
1629
1630/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1631///
1632/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1633/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1634///
1635/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1636/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1637/// When undef's appear they are treated as if they were whatever value is
1638/// necessary in order to fit the above forms.
1639///
1640/// For example:
1641/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1642/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1643/// i32 7, i32 6, i32 5, i32 4>
1644/// is lowered to:
1645/// (VSHUF4I_H $v0, $v1, 27)
1646/// where the 27 comes from:
1647/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1648static SDValue
1650 SDValue V1, SDValue V2, SelectionDAG &DAG,
1651 const LoongArchSubtarget &Subtarget) {
1652
1653 unsigned SubVecSize = 4;
1654 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1655 SubVecSize = 2;
1656
1657 int SubMask[4] = {-1, -1, -1, -1};
1658 for (unsigned i = 0; i < SubVecSize; ++i) {
1659 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1660 int M = Mask[j];
1661
1662 // Convert from vector index to 4-element subvector index
1663 // If an index refers to an element outside of the subvector then give up
1664 if (M != -1) {
1665 M -= 4 * (j / SubVecSize);
1666 if (M < 0 || M >= 4)
1667 return SDValue();
1668 }
1669
1670 // If the mask has an undef, replace it with the current index.
1671 // Note that it might still be undef if the current index is also undef
1672 if (SubMask[i] == -1)
1673 SubMask[i] = M;
1674 // Check that non-undef values are the same as in the mask. If they
1675 // aren't then give up
1676 else if (M != -1 && M != SubMask[i])
1677 return SDValue();
1678 }
1679 }
1680
1681 // Calculate the immediate. Replace any remaining undefs with zero
1682 int Imm = 0;
1683 for (int i = SubVecSize - 1; i >= 0; --i) {
1684 int M = SubMask[i];
1685
1686 if (M == -1)
1687 M = 0;
1688
1689 Imm <<= 2;
1690 Imm |= M & 0x3;
1691 }
1692
1693 MVT GRLenVT = Subtarget.getGRLenVT();
1694
1695 // Return vshuf4i.d
1696 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1697 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1698 DAG.getConstant(Imm, DL, GRLenVT));
1699
1700 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1701 DAG.getConstant(Imm, DL, GRLenVT));
1702}
1703
1704/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1705///
1706/// VPACKEV interleaves the even elements from each vector.
1707///
1708/// It is possible to lower into VPACKEV when the mask consists of two of the
1709/// following forms interleaved:
1710/// <0, 2, 4, ...>
1711/// <n, n+2, n+4, ...>
1712/// where n is the number of elements in the vector.
1713/// For example:
1714/// <0, 0, 2, 2, 4, 4, ...>
1715/// <0, n, 2, n+2, 4, n+4, ...>
1716///
1717/// When undef's appear in the mask they are treated as if they were whatever
1718/// value is necessary in order to fit the above forms.
1720 MVT VT, SDValue V1, SDValue V2,
1721 SelectionDAG &DAG) {
1722
1723 const auto &Begin = Mask.begin();
1724 const auto &End = Mask.end();
1725 SDValue OriV1 = V1, OriV2 = V2;
1726
1727 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1728 V1 = OriV1;
1729 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1730 V1 = OriV2;
1731 else
1732 return SDValue();
1733
1734 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1735 V2 = OriV1;
1736 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1737 V2 = OriV2;
1738 else
1739 return SDValue();
1740
1741 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1742}
1743
1744/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1745///
1746/// VPACKOD interleaves the odd elements from each vector.
1747///
1748/// It is possible to lower into VPACKOD when the mask consists of two of the
1749/// following forms interleaved:
1750/// <1, 3, 5, ...>
1751/// <n+1, n+3, n+5, ...>
1752/// where n is the number of elements in the vector.
1753/// For example:
1754/// <1, 1, 3, 3, 5, 5, ...>
1755/// <1, n+1, 3, n+3, 5, n+5, ...>
1756///
1757/// When undef's appear in the mask they are treated as if they were whatever
1758/// value is necessary in order to fit the above forms.
1760 MVT VT, SDValue V1, SDValue V2,
1761 SelectionDAG &DAG) {
1762
1763 const auto &Begin = Mask.begin();
1764 const auto &End = Mask.end();
1765 SDValue OriV1 = V1, OriV2 = V2;
1766
1767 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1768 V1 = OriV1;
1769 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1770 V1 = OriV2;
1771 else
1772 return SDValue();
1773
1774 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1775 V2 = OriV1;
1776 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1777 V2 = OriV2;
1778 else
1779 return SDValue();
1780
1781 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1782}
1783
1784/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1785///
1786/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1787/// of each vector.
1788///
1789/// It is possible to lower into VILVH when the mask consists of two of the
1790/// following forms interleaved:
1791/// <x, x+1, x+2, ...>
1792/// <n+x, n+x+1, n+x+2, ...>
1793/// where n is the number of elements in the vector and x is half n.
1794/// For example:
1795/// <x, x, x+1, x+1, x+2, x+2, ...>
1796/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1797///
1798/// When undef's appear in the mask they are treated as if they were whatever
1799/// value is necessary in order to fit the above forms.
1801 MVT VT, SDValue V1, SDValue V2,
1802 SelectionDAG &DAG) {
1803
1804 const auto &Begin = Mask.begin();
1805 const auto &End = Mask.end();
1806 unsigned HalfSize = Mask.size() / 2;
1807 SDValue OriV1 = V1, OriV2 = V2;
1808
1809 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1810 V1 = OriV1;
1811 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1812 V1 = OriV2;
1813 else
1814 return SDValue();
1815
1816 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1817 V2 = OriV1;
1818 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1819 1))
1820 V2 = OriV2;
1821 else
1822 return SDValue();
1823
1824 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1825}
1826
1827/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1828///
1829/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1830/// of each vector.
1831///
1832/// It is possible to lower into VILVL when the mask consists of two of the
1833/// following forms interleaved:
1834/// <0, 1, 2, ...>
1835/// <n, n+1, n+2, ...>
1836/// where n is the number of elements in the vector.
1837/// For example:
1838/// <0, 0, 1, 1, 2, 2, ...>
1839/// <0, n, 1, n+1, 2, n+2, ...>
1840///
1841/// When undef's appear in the mask they are treated as if they were whatever
1842/// value is necessary in order to fit the above forms.
1844 MVT VT, SDValue V1, SDValue V2,
1845 SelectionDAG &DAG) {
1846
1847 const auto &Begin = Mask.begin();
1848 const auto &End = Mask.end();
1849 SDValue OriV1 = V1, OriV2 = V2;
1850
1851 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1852 V1 = OriV1;
1853 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1854 V1 = OriV2;
1855 else
1856 return SDValue();
1857
1858 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1859 V2 = OriV1;
1860 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1861 V2 = OriV2;
1862 else
1863 return SDValue();
1864
1865 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1866}
1867
1868/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1869///
1870/// VPICKEV copies the even elements of each vector into the result vector.
1871///
1872/// It is possible to lower into VPICKEV when the mask consists of two of the
1873/// following forms concatenated:
1874/// <0, 2, 4, ...>
1875/// <n, n+2, n+4, ...>
1876/// where n is the number of elements in the vector.
1877/// For example:
1878/// <0, 2, 4, ..., 0, 2, 4, ...>
1879/// <0, 2, 4, ..., n, n+2, n+4, ...>
1880///
1881/// When undef's appear in the mask they are treated as if they were whatever
1882/// value is necessary in order to fit the above forms.
1884 MVT VT, SDValue V1, SDValue V2,
1885 SelectionDAG &DAG) {
1886
1887 const auto &Begin = Mask.begin();
1888 const auto &Mid = Mask.begin() + Mask.size() / 2;
1889 const auto &End = Mask.end();
1890 SDValue OriV1 = V1, OriV2 = V2;
1891
1892 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1893 V1 = OriV1;
1894 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1895 V1 = OriV2;
1896 else
1897 return SDValue();
1898
1899 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1900 V2 = OriV1;
1901 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1902 V2 = OriV2;
1903
1904 else
1905 return SDValue();
1906
1907 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1908}
1909
1910/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1911///
1912/// VPICKOD copies the odd elements of each vector into the result vector.
1913///
1914/// It is possible to lower into VPICKOD when the mask consists of two of the
1915/// following forms concatenated:
1916/// <1, 3, 5, ...>
1917/// <n+1, n+3, n+5, ...>
1918/// where n is the number of elements in the vector.
1919/// For example:
1920/// <1, 3, 5, ..., 1, 3, 5, ...>
1921/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1922///
1923/// When undef's appear in the mask they are treated as if they were whatever
1924/// value is necessary in order to fit the above forms.
1926 MVT VT, SDValue V1, SDValue V2,
1927 SelectionDAG &DAG) {
1928
1929 const auto &Begin = Mask.begin();
1930 const auto &Mid = Mask.begin() + Mask.size() / 2;
1931 const auto &End = Mask.end();
1932 SDValue OriV1 = V1, OriV2 = V2;
1933
1934 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1935 V1 = OriV1;
1936 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1937 V1 = OriV2;
1938 else
1939 return SDValue();
1940
1941 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1942 V2 = OriV1;
1943 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1944 V2 = OriV2;
1945 else
1946 return SDValue();
1947
1948 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1949}
1950
1951/// Lower VECTOR_SHUFFLE into VSHUF.
1952///
1953/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1954/// adding it as an operand to the resulting VSHUF.
1956 MVT VT, SDValue V1, SDValue V2,
1957 SelectionDAG &DAG,
1958 const LoongArchSubtarget &Subtarget) {
1959
1961 for (auto M : Mask)
1962 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
1963
1964 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1965 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1966
1967 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1968 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1969 // VSHF concatenates the vectors in a bitwise fashion:
1970 // <0b00, 0b01> + <0b10, 0b11> ->
1971 // 0b0100 + 0b1110 -> 0b01001110
1972 // <0b10, 0b11, 0b00, 0b01>
1973 // We must therefore swap the operands to get the correct result.
1974 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1975}
1976
1977/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1978///
1979/// This routine breaks down the specific type of 128-bit shuffle and
1980/// dispatches to the lowering routines accordingly.
1982 SDValue V1, SDValue V2, SelectionDAG &DAG,
1983 const LoongArchSubtarget &Subtarget) {
1984 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1985 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1986 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1987 "Vector type is unsupported for lsx!");
1989 "Two operands have different types!");
1990 assert(VT.getVectorNumElements() == Mask.size() &&
1991 "Unexpected mask size for shuffle!");
1992 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1993
1994 APInt KnownUndef, KnownZero;
1995 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1996 APInt Zeroable = KnownUndef | KnownZero;
1997
1998 SDValue Result;
1999 // TODO: Add more comparison patterns.
2000 if (V2.isUndef()) {
2001 if ((Result =
2002 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2003 return Result;
2004 if ((Result =
2005 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2006 return Result;
2007
2008 // TODO: This comment may be enabled in the future to better match the
2009 // pattern for instruction selection.
2010 /* V2 = V1; */
2011 }
2012
2013 // It is recommended not to change the pattern comparison order for better
2014 // performance.
2015 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2016 return Result;
2017 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2018 return Result;
2019 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2020 return Result;
2021 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2022 return Result;
2023 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2024 return Result;
2025 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2026 return Result;
2027 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2028 (Result =
2029 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2030 return Result;
2031 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2032 Zeroable)))
2033 return Result;
2034 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2035 Zeroable)))
2036 return Result;
2037 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2038 Subtarget)))
2039 return Result;
2040 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2041 return NewShuffle;
2042 if ((Result =
2043 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2044 return Result;
2045 return SDValue();
2046}
2047
2048/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2049///
2050/// It is a XVREPLVEI when the mask is:
2051/// <x, x, x, ..., x+n, x+n, x+n, ...>
2052/// where the number of x is equal to n and n is half the length of vector.
2053///
2054/// When undef's appear in the mask they are treated as if they were whatever
2055/// value is necessary in order to fit the above form.
2056static SDValue
2058 SDValue V1, SelectionDAG &DAG,
2059 const LoongArchSubtarget &Subtarget) {
2060 int SplatIndex = -1;
2061 for (const auto &M : Mask) {
2062 if (M != -1) {
2063 SplatIndex = M;
2064 break;
2065 }
2066 }
2067
2068 if (SplatIndex == -1)
2069 return DAG.getUNDEF(VT);
2070
2071 const auto &Begin = Mask.begin();
2072 const auto &End = Mask.end();
2073 int HalfSize = Mask.size() / 2;
2074
2075 if (SplatIndex >= HalfSize)
2076 return SDValue();
2077
2078 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2079 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2080 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2081 0)) {
2082 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2083 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2084 }
2085
2086 return SDValue();
2087}
2088
2089/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2090static SDValue
2092 SDValue V1, SDValue V2, SelectionDAG &DAG,
2093 const LoongArchSubtarget &Subtarget) {
2094 // When the size is less than or equal to 4, lower cost instructions may be
2095 // used.
2096 if (Mask.size() <= 4)
2097 return SDValue();
2098 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2099}
2100
2101/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2102static SDValue
2104 SDValue V1, SelectionDAG &DAG,
2105 const LoongArchSubtarget &Subtarget) {
2106 // Only consider XVPERMI_D.
2107 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2108 return SDValue();
2109
2110 unsigned MaskImm = 0;
2111 for (unsigned i = 0; i < Mask.size(); ++i) {
2112 if (Mask[i] == -1)
2113 continue;
2114 MaskImm |= Mask[i] << (i * 2);
2115 }
2116
2117 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2118 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2119}
2120
2121/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2123 MVT VT, SDValue V1, SelectionDAG &DAG,
2124 const LoongArchSubtarget &Subtarget) {
2125 // LoongArch LASX only have XVPERM_W.
2126 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2127 return SDValue();
2128
2129 unsigned NumElts = VT.getVectorNumElements();
2130 unsigned HalfSize = NumElts / 2;
2131 bool FrontLo = true, FrontHi = true;
2132 bool BackLo = true, BackHi = true;
2133
2134 auto inRange = [](int val, int low, int high) {
2135 return (val == -1) || (val >= low && val < high);
2136 };
2137
2138 for (unsigned i = 0; i < HalfSize; ++i) {
2139 int Fronti = Mask[i];
2140 int Backi = Mask[i + HalfSize];
2141
2142 FrontLo &= inRange(Fronti, 0, HalfSize);
2143 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2144 BackLo &= inRange(Backi, 0, HalfSize);
2145 BackHi &= inRange(Backi, HalfSize, NumElts);
2146 }
2147
2148 // If both the lower and upper 128-bit parts access only one half of the
2149 // vector (either lower or upper), avoid using xvperm.w. The latency of
2150 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2151 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2152 return SDValue();
2153
2155 MVT GRLenVT = Subtarget.getGRLenVT();
2156 for (unsigned i = 0; i < NumElts; ++i)
2157 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2158 : DAG.getConstant(Mask[i], DL, GRLenVT));
2159 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2160
2161 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2162}
2163
2164/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2166 MVT VT, SDValue V1, SDValue V2,
2167 SelectionDAG &DAG) {
2168 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2169}
2170
2171/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2173 MVT VT, SDValue V1, SDValue V2,
2174 SelectionDAG &DAG) {
2175 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2176}
2177
2178/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2180 MVT VT, SDValue V1, SDValue V2,
2181 SelectionDAG &DAG) {
2182
2183 const auto &Begin = Mask.begin();
2184 const auto &End = Mask.end();
2185 unsigned HalfSize = Mask.size() / 2;
2186 unsigned LeftSize = HalfSize / 2;
2187 SDValue OriV1 = V1, OriV2 = V2;
2188
2189 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2190 1) &&
2191 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2192 V1 = OriV1;
2193 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2194 Mask.size() + HalfSize - LeftSize, 1) &&
2195 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2196 Mask.size() + HalfSize + LeftSize, 1))
2197 V1 = OriV2;
2198 else
2199 return SDValue();
2200
2201 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2202 1) &&
2203 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2204 1))
2205 V2 = OriV1;
2206 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2207 Mask.size() + HalfSize - LeftSize, 1) &&
2208 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2209 Mask.size() + HalfSize + LeftSize, 1))
2210 V2 = OriV2;
2211 else
2212 return SDValue();
2213
2214 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2215}
2216
2217/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2219 MVT VT, SDValue V1, SDValue V2,
2220 SelectionDAG &DAG) {
2221
2222 const auto &Begin = Mask.begin();
2223 const auto &End = Mask.end();
2224 unsigned HalfSize = Mask.size() / 2;
2225 SDValue OriV1 = V1, OriV2 = V2;
2226
2227 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2228 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2229 V1 = OriV1;
2230 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2231 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2232 Mask.size() + HalfSize, 1))
2233 V1 = OriV2;
2234 else
2235 return SDValue();
2236
2237 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2238 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2239 V2 = OriV1;
2240 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2241 1) &&
2242 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2243 Mask.size() + HalfSize, 1))
2244 V2 = OriV2;
2245 else
2246 return SDValue();
2247
2248 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2249}
2250
2251/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2253 MVT VT, SDValue V1, SDValue V2,
2254 SelectionDAG &DAG) {
2255
2256 const auto &Begin = Mask.begin();
2257 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2258 const auto &Mid = Mask.begin() + Mask.size() / 2;
2259 const auto &RightMid = Mask.end() - Mask.size() / 4;
2260 const auto &End = Mask.end();
2261 unsigned HalfSize = Mask.size() / 2;
2262 SDValue OriV1 = V1, OriV2 = V2;
2263
2264 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2265 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2266 V1 = OriV1;
2267 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2268 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2269 V1 = OriV2;
2270 else
2271 return SDValue();
2272
2273 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2274 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2275 V2 = OriV1;
2276 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2277 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2278 V2 = OriV2;
2279
2280 else
2281 return SDValue();
2282
2283 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2284}
2285
2286/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2288 MVT VT, SDValue V1, SDValue V2,
2289 SelectionDAG &DAG) {
2290
2291 const auto &Begin = Mask.begin();
2292 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2293 const auto &Mid = Mask.begin() + Mask.size() / 2;
2294 const auto &RightMid = Mask.end() - Mask.size() / 4;
2295 const auto &End = Mask.end();
2296 unsigned HalfSize = Mask.size() / 2;
2297 SDValue OriV1 = V1, OriV2 = V2;
2298
2299 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2300 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2301 V1 = OriV1;
2302 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2303 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2304 2))
2305 V1 = OriV2;
2306 else
2307 return SDValue();
2308
2309 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2310 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2311 V2 = OriV1;
2312 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2313 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2314 2))
2315 V2 = OriV2;
2316 else
2317 return SDValue();
2318
2319 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2320}
2321
2322/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2323static SDValue
2325 SDValue V1, SDValue V2, SelectionDAG &DAG,
2326 const LoongArchSubtarget &Subtarget) {
2327 // LoongArch LASX only supports xvinsve0.{w/d}.
2328 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2329 VT != MVT::v4f64)
2330 return SDValue();
2331
2332 MVT GRLenVT = Subtarget.getGRLenVT();
2333 int MaskSize = Mask.size();
2334 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2335
2336 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2337 // all other elements are either 'Base + i' or undef (-1). On success, return
2338 // the index of the replaced element. Otherwise, just return -1.
2339 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2340 int Idx = -1;
2341 for (int i = 0; i < MaskSize; ++i) {
2342 if (Mask[i] == Base + i || Mask[i] == -1)
2343 continue;
2344 if (Mask[i] != Replaced)
2345 return -1;
2346 if (Idx == -1)
2347 Idx = i;
2348 else
2349 return -1;
2350 }
2351 return Idx;
2352 };
2353
2354 // Case 1: the lowest element of V2 replaces one element in V1.
2355 int Idx = checkReplaceOne(0, MaskSize);
2356 if (Idx != -1)
2357 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2358 DAG.getConstant(Idx, DL, GRLenVT));
2359
2360 // Case 2: the lowest element of V1 replaces one element in V2.
2361 Idx = checkReplaceOne(MaskSize, 0);
2362 if (Idx != -1)
2363 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2364 DAG.getConstant(Idx, DL, GRLenVT));
2365
2366 return SDValue();
2367}
2368
2369/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2371 MVT VT, SDValue V1, SDValue V2,
2372 SelectionDAG &DAG) {
2373
2374 int MaskSize = Mask.size();
2375 int HalfSize = Mask.size() / 2;
2376 const auto &Begin = Mask.begin();
2377 const auto &Mid = Mask.begin() + HalfSize;
2378 const auto &End = Mask.end();
2379
2380 // VECTOR_SHUFFLE concatenates the vectors:
2381 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2382 // shuffling ->
2383 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2384 //
2385 // XVSHUF concatenates the vectors:
2386 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2387 // shuffling ->
2388 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2389 SmallVector<SDValue, 8> MaskAlloc;
2390 for (auto it = Begin; it < Mid; it++) {
2391 if (*it < 0) // UNDEF
2392 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2393 else if ((*it >= 0 && *it < HalfSize) ||
2394 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2395 int M = *it < HalfSize ? *it : *it - HalfSize;
2396 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2397 } else
2398 return SDValue();
2399 }
2400 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2401
2402 for (auto it = Mid; it < End; it++) {
2403 if (*it < 0) // UNDEF
2404 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2405 else if ((*it >= HalfSize && *it < MaskSize) ||
2406 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2407 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2408 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2409 } else
2410 return SDValue();
2411 }
2412 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2413
2414 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2415 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2416 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2417}
2418
2419/// Shuffle vectors by lane to generate more optimized instructions.
2420/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2421///
2422/// Therefore, except for the following four cases, other cases are regarded
2423/// as cross-lane shuffles, where optimization is relatively limited.
2424///
2425/// - Shuffle high, low lanes of two inputs vector
2426/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2427/// - Shuffle low, high lanes of two inputs vector
2428/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2429/// - Shuffle low, low lanes of two inputs vector
2430/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2431/// - Shuffle high, high lanes of two inputs vector
2432/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2433///
2434/// The first case is the closest to LoongArch instructions and the other
2435/// cases need to be converted to it for processing.
2436///
2437/// This function will return true for the last three cases above and will
2438/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2439/// cross-lane shuffle cases.
2441 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2442 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2443
2444 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2445
2446 int MaskSize = Mask.size();
2447 int HalfSize = Mask.size() / 2;
2448 MVT GRLenVT = Subtarget.getGRLenVT();
2449
2450 HalfMaskType preMask = None, postMask = None;
2451
2452 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2453 return M < 0 || (M >= 0 && M < HalfSize) ||
2454 (M >= MaskSize && M < MaskSize + HalfSize);
2455 }))
2456 preMask = HighLaneTy;
2457 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2458 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2459 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2460 }))
2461 preMask = LowLaneTy;
2462
2463 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2464 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2465 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2466 }))
2467 postMask = LowLaneTy;
2468 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2469 return M < 0 || (M >= 0 && M < HalfSize) ||
2470 (M >= MaskSize && M < MaskSize + HalfSize);
2471 }))
2472 postMask = HighLaneTy;
2473
2474 // The pre-half of mask is high lane type, and the post-half of mask
2475 // is low lane type, which is closest to the LoongArch instructions.
2476 //
2477 // Note: In the LoongArch architecture, the high lane of mask corresponds
2478 // to the lower 128-bit of vector register, and the low lane of mask
2479 // corresponds the higher 128-bit of vector register.
2480 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2481 return false;
2482 }
2483 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2484 V1 = DAG.getBitcast(MVT::v4i64, V1);
2485 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2486 DAG.getConstant(0b01001110, DL, GRLenVT));
2487 V1 = DAG.getBitcast(VT, V1);
2488
2489 if (!V2.isUndef()) {
2490 V2 = DAG.getBitcast(MVT::v4i64, V2);
2491 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2492 DAG.getConstant(0b01001110, DL, GRLenVT));
2493 V2 = DAG.getBitcast(VT, V2);
2494 }
2495
2496 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2497 *it = *it < 0 ? *it : *it - HalfSize;
2498 }
2499 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2500 *it = *it < 0 ? *it : *it + HalfSize;
2501 }
2502 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2503 V1 = DAG.getBitcast(MVT::v4i64, V1);
2504 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2505 DAG.getConstant(0b11101110, DL, GRLenVT));
2506 V1 = DAG.getBitcast(VT, V1);
2507
2508 if (!V2.isUndef()) {
2509 V2 = DAG.getBitcast(MVT::v4i64, V2);
2510 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2511 DAG.getConstant(0b11101110, DL, GRLenVT));
2512 V2 = DAG.getBitcast(VT, V2);
2513 }
2514
2515 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2516 *it = *it < 0 ? *it : *it - HalfSize;
2517 }
2518 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2519 V1 = DAG.getBitcast(MVT::v4i64, V1);
2520 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2521 DAG.getConstant(0b01000100, DL, GRLenVT));
2522 V1 = DAG.getBitcast(VT, V1);
2523
2524 if (!V2.isUndef()) {
2525 V2 = DAG.getBitcast(MVT::v4i64, V2);
2526 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2527 DAG.getConstant(0b01000100, DL, GRLenVT));
2528 V2 = DAG.getBitcast(VT, V2);
2529 }
2530
2531 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2532 *it = *it < 0 ? *it : *it + HalfSize;
2533 }
2534 } else { // cross-lane
2535 return false;
2536 }
2537
2538 return true;
2539}
2540
2541/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2542/// Only for 256-bit vector.
2543///
2544/// For example:
2545/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2546/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2547/// is lowerded to:
2548/// (XVPERMI $xr2, $xr0, 78)
2549/// (XVSHUF $xr1, $xr2, $xr0)
2550/// (XVORI $xr0, $xr1, 0)
2552 ArrayRef<int> Mask,
2553 MVT VT, SDValue V1,
2554 SDValue V2,
2555 SelectionDAG &DAG) {
2556 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2557 int Size = Mask.size();
2558 int LaneSize = Size / 2;
2559
2560 bool LaneCrossing[2] = {false, false};
2561 for (int i = 0; i < Size; ++i)
2562 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2563 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2564
2565 // Ensure that all lanes ared involved.
2566 if (!LaneCrossing[0] && !LaneCrossing[1])
2567 return SDValue();
2568
2569 SmallVector<int> InLaneMask;
2570 InLaneMask.assign(Mask.begin(), Mask.end());
2571 for (int i = 0; i < Size; ++i) {
2572 int &M = InLaneMask[i];
2573 if (M < 0)
2574 continue;
2575 if (((M % Size) / LaneSize) != (i / LaneSize))
2576 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2577 }
2578
2579 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2580 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2581 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2582 Flipped = DAG.getBitcast(VT, Flipped);
2583 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2584}
2585
2586/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2587///
2588/// This routine breaks down the specific type of 256-bit shuffle and
2589/// dispatches to the lowering routines accordingly.
2591 SDValue V1, SDValue V2, SelectionDAG &DAG,
2592 const LoongArchSubtarget &Subtarget) {
2593 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2594 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2595 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2596 "Vector type is unsupported for lasx!");
2598 "Two operands have different types!");
2599 assert(VT.getVectorNumElements() == Mask.size() &&
2600 "Unexpected mask size for shuffle!");
2601 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2602 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2603
2604 APInt KnownUndef, KnownZero;
2605 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2606 APInt Zeroable = KnownUndef | KnownZero;
2607
2608 SDValue Result;
2609 // TODO: Add more comparison patterns.
2610 if (V2.isUndef()) {
2611 if ((Result =
2612 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2613 return Result;
2614 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2615 Subtarget)))
2616 return Result;
2617 if ((Result =
2618 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2619 return Result;
2620 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2621 return Result;
2622
2623 // TODO: This comment may be enabled in the future to better match the
2624 // pattern for instruction selection.
2625 /* V2 = V1; */
2626 }
2627
2628 // It is recommended not to change the pattern comparison order for better
2629 // performance.
2630 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2631 return Result;
2632 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2633 return Result;
2634 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2635 return Result;
2636 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2637 return Result;
2638 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2639 return Result;
2640 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2641 return Result;
2642 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2643 Zeroable)))
2644 return Result;
2645 if ((Result =
2646 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2647 return Result;
2648 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2649 Subtarget)))
2650 return Result;
2651
2652 // canonicalize non cross-lane shuffle vector
2653 SmallVector<int> NewMask(Mask);
2654 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2655 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2656
2657 // FIXME: Handling the remaining cases earlier can degrade performance
2658 // in some situations. Further analysis is required to enable more
2659 // effective optimizations.
2660 if (V2.isUndef()) {
2661 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2662 V1, V2, DAG)))
2663 return Result;
2664 }
2665
2666 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2667 return NewShuffle;
2668 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2669 return Result;
2670
2671 return SDValue();
2672}
2673
2674SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2675 SelectionDAG &DAG) const {
2676 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2677 ArrayRef<int> OrigMask = SVOp->getMask();
2678 SDValue V1 = Op.getOperand(0);
2679 SDValue V2 = Op.getOperand(1);
2680 MVT VT = Op.getSimpleValueType();
2681 int NumElements = VT.getVectorNumElements();
2682 SDLoc DL(Op);
2683
2684 bool V1IsUndef = V1.isUndef();
2685 bool V2IsUndef = V2.isUndef();
2686 if (V1IsUndef && V2IsUndef)
2687 return DAG.getUNDEF(VT);
2688
2689 // When we create a shuffle node we put the UNDEF node to second operand,
2690 // but in some cases the first operand may be transformed to UNDEF.
2691 // In this case we should just commute the node.
2692 if (V1IsUndef)
2693 return DAG.getCommutedVectorShuffle(*SVOp);
2694
2695 // Check for non-undef masks pointing at an undef vector and make the masks
2696 // undef as well. This makes it easier to match the shuffle based solely on
2697 // the mask.
2698 if (V2IsUndef &&
2699 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2700 SmallVector<int, 8> NewMask(OrigMask);
2701 for (int &M : NewMask)
2702 if (M >= NumElements)
2703 M = -1;
2704 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2705 }
2706
2707 // Check for illegal shuffle mask element index values.
2708 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2709 (void)MaskUpperLimit;
2710 assert(llvm::all_of(OrigMask,
2711 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2712 "Out of bounds shuffle index");
2713
2714 // For each vector width, delegate to a specialized lowering routine.
2715 if (VT.is128BitVector())
2716 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2717
2718 if (VT.is256BitVector())
2719 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2720
2721 return SDValue();
2722}
2723
2724SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2725 SelectionDAG &DAG) const {
2726 // Custom lower to ensure the libcall return is passed in an FPR on hard
2727 // float ABIs.
2728 SDLoc DL(Op);
2729 MakeLibCallOptions CallOptions;
2730 SDValue Op0 = Op.getOperand(0);
2731 SDValue Chain = SDValue();
2732 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2733 SDValue Res;
2734 std::tie(Res, Chain) =
2735 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2736 if (Subtarget.is64Bit())
2737 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2738 return DAG.getBitcast(MVT::i32, Res);
2739}
2740
2741SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2742 SelectionDAG &DAG) const {
2743 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2744 // float ABIs.
2745 SDLoc DL(Op);
2746 MakeLibCallOptions CallOptions;
2747 SDValue Op0 = Op.getOperand(0);
2748 SDValue Chain = SDValue();
2749 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2750 DL, MVT::f32, Op0)
2751 : DAG.getBitcast(MVT::f32, Op0);
2752 SDValue Res;
2753 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2754 CallOptions, DL, Chain);
2755 return Res;
2756}
2757
2758SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2759 SelectionDAG &DAG) const {
2760 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2761 SDLoc DL(Op);
2762 MakeLibCallOptions CallOptions;
2763 RTLIB::Libcall LC =
2764 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2765 SDValue Res =
2766 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2767 if (Subtarget.is64Bit())
2768 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2769 return DAG.getBitcast(MVT::i32, Res);
2770}
2771
2772SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2773 SelectionDAG &DAG) const {
2774 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2775 MVT VT = Op.getSimpleValueType();
2776 SDLoc DL(Op);
2777 Op = DAG.getNode(
2778 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2779 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2780 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2781 DL, MVT::f32, Op)
2782 : DAG.getBitcast(MVT::f32, Op);
2783 if (VT != MVT::f32)
2784 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2785 return Res;
2786}
2787
2788// Lower BUILD_VECTOR as broadcast load (if possible).
2789// For example:
2790// %a = load i8, ptr %ptr
2791// %b = build_vector %a, %a, %a, %a
2792// is lowered to :
2793// (VLDREPL_B $a0, 0)
2795 const SDLoc &DL,
2796 SelectionDAG &DAG) {
2797 MVT VT = BVOp->getSimpleValueType(0);
2798 int NumOps = BVOp->getNumOperands();
2799
2800 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2801 "Unsupported vector type for broadcast.");
2802
2803 SDValue IdentitySrc;
2804 bool IsIdeneity = true;
2805
2806 for (int i = 0; i != NumOps; i++) {
2807 SDValue Op = BVOp->getOperand(i);
2808 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2809 IsIdeneity = false;
2810 break;
2811 }
2812 IdentitySrc = BVOp->getOperand(0);
2813 }
2814
2815 // make sure that this load is valid and only has one user.
2816 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2817 return SDValue();
2818
2819 auto *LN = cast<LoadSDNode>(IdentitySrc);
2820 auto ExtType = LN->getExtensionType();
2821
2822 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2823 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2824 SDVTList Tys =
2825 LN->isIndexed()
2826 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2827 : DAG.getVTList(VT, MVT::Other);
2828 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2829 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2830 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2831 return BCast;
2832 }
2833 return SDValue();
2834}
2835
2836// Sequentially insert elements from Ops into Vector, from low to high indices.
2837// Note: Ops can have fewer elements than Vector.
2839 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2840 EVT ResTy) {
2841 assert(Ops.size() <= ResTy.getVectorNumElements());
2842
2843 SDValue Op0 = Ops[0];
2844 if (!Op0.isUndef())
2845 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2846 for (unsigned i = 1; i < Ops.size(); ++i) {
2847 SDValue Opi = Ops[i];
2848 if (Opi.isUndef())
2849 continue;
2850 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2851 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2852 }
2853}
2854
2855// Build a ResTy subvector from Node, taking NumElts elements starting at index
2856// 'first'.
2858 SelectionDAG &DAG, SDLoc DL,
2859 const LoongArchSubtarget &Subtarget,
2860 EVT ResTy, unsigned first) {
2861 unsigned NumElts = ResTy.getVectorNumElements();
2862
2863 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2864
2865 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2866 Node->op_begin() + first + NumElts);
2867 SDValue Vector = DAG.getUNDEF(ResTy);
2868 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2869 return Vector;
2870}
2871
2872SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2873 SelectionDAG &DAG) const {
2874 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2875 MVT VT = Node->getSimpleValueType(0);
2876 EVT ResTy = Op->getValueType(0);
2877 unsigned NumElts = ResTy.getVectorNumElements();
2878 SDLoc DL(Op);
2879 APInt SplatValue, SplatUndef;
2880 unsigned SplatBitSize;
2881 bool HasAnyUndefs;
2882 bool IsConstant = false;
2883 bool UseSameConstant = true;
2884 SDValue ConstantValue;
2885 bool Is128Vec = ResTy.is128BitVector();
2886 bool Is256Vec = ResTy.is256BitVector();
2887
2888 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2889 (!Subtarget.hasExtLASX() || !Is256Vec))
2890 return SDValue();
2891
2892 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2893 return Result;
2894
2895 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2896 /*MinSplatBits=*/8) &&
2897 SplatBitSize <= 64) {
2898 // We can only cope with 8, 16, 32, or 64-bit elements.
2899 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2900 SplatBitSize != 64)
2901 return SDValue();
2902
2903 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2904 // We can only handle 64-bit elements that are within
2905 // the signed 10-bit range or match vldi patterns on 32-bit targets.
2906 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2907 if (!SplatValue.isSignedIntN(10) &&
2908 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
2909 return SDValue();
2910 if ((Is128Vec && ResTy == MVT::v4i32) ||
2911 (Is256Vec && ResTy == MVT::v8i32))
2912 return Op;
2913 }
2914
2915 EVT ViaVecTy;
2916
2917 switch (SplatBitSize) {
2918 default:
2919 return SDValue();
2920 case 8:
2921 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2922 break;
2923 case 16:
2924 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2925 break;
2926 case 32:
2927 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2928 break;
2929 case 64:
2930 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2931 break;
2932 }
2933
2934 // SelectionDAG::getConstant will promote SplatValue appropriately.
2935 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2936
2937 // Bitcast to the type we originally wanted.
2938 if (ViaVecTy != ResTy)
2939 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2940
2941 return Result;
2942 }
2943
2944 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2945 return Op;
2946
2947 for (unsigned i = 0; i < NumElts; ++i) {
2948 SDValue Opi = Node->getOperand(i);
2949 if (isIntOrFPConstant(Opi)) {
2950 IsConstant = true;
2951 if (!ConstantValue.getNode())
2952 ConstantValue = Opi;
2953 else if (ConstantValue != Opi)
2954 UseSameConstant = false;
2955 }
2956 }
2957
2958 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2959 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2960 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2961 for (unsigned i = 0; i < NumElts; ++i) {
2962 SDValue Opi = Node->getOperand(i);
2963 if (!isIntOrFPConstant(Opi))
2964 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2965 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2966 }
2967 return Result;
2968 }
2969
2970 if (!IsConstant) {
2971 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2972 // the sub-sequence of the vector and then broadcast the sub-sequence.
2973 //
2974 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2975 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2976 // generates worse code in some cases. This could be further optimized
2977 // with more consideration.
2979 BitVector UndefElements;
2980 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2981 UndefElements.count() == 0) {
2982 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2983 // because the high part can be simply treated as undef.
2984 SDValue Vector = DAG.getUNDEF(ResTy);
2985 EVT FillTy = Is256Vec
2987 : ResTy;
2988 SDValue FillVec =
2989 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
2990
2991 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
2992
2993 unsigned SeqLen = Sequence.size();
2994 unsigned SplatLen = NumElts / SeqLen;
2995 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2996 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2997
2998 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
2999 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3000 if (SplatEltTy == MVT::i128)
3001 SplatTy = MVT::v4i64;
3002
3003 SDValue SplatVec;
3004 SDValue SrcVec = DAG.getBitcast(
3005 SplatTy,
3006 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3007 if (Is256Vec) {
3008 SplatVec =
3009 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3011 DL, SplatTy, SrcVec);
3012 } else {
3013 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3014 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3015 }
3016
3017 return DAG.getBitcast(ResTy, SplatVec);
3018 }
3019
3020 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3021 // using memory operations is much lower.
3022 //
3023 // For 256-bit vectors, normally split into two halves and concatenate.
3024 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3025 // one non-undef element, skip spliting to avoid a worse result.
3026 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3027 ResTy == MVT::v4f64) {
3028 unsigned NonUndefCount = 0;
3029 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3030 if (!Node->getOperand(i).isUndef()) {
3031 ++NonUndefCount;
3032 if (NonUndefCount > 1)
3033 break;
3034 }
3035 }
3036 if (NonUndefCount == 1)
3037 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3038 }
3039
3040 EVT VecTy =
3041 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3042 SDValue Vector =
3043 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3044
3045 if (Is128Vec)
3046 return Vector;
3047
3048 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3049 VecTy, NumElts / 2);
3050
3051 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3052 }
3053
3054 return SDValue();
3055}
3056
3057SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3058 SelectionDAG &DAG) const {
3059 SDLoc DL(Op);
3060 MVT ResVT = Op.getSimpleValueType();
3061 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3062
3063 unsigned NumOperands = Op.getNumOperands();
3064 unsigned NumFreezeUndef = 0;
3065 unsigned NumZero = 0;
3066 unsigned NumNonZero = 0;
3067 unsigned NonZeros = 0;
3068 SmallSet<SDValue, 4> Undefs;
3069 for (unsigned i = 0; i != NumOperands; ++i) {
3070 SDValue SubVec = Op.getOperand(i);
3071 if (SubVec.isUndef())
3072 continue;
3073 if (ISD::isFreezeUndef(SubVec.getNode())) {
3074 // If the freeze(undef) has multiple uses then we must fold to zero.
3075 if (SubVec.hasOneUse()) {
3076 ++NumFreezeUndef;
3077 } else {
3078 ++NumZero;
3079 Undefs.insert(SubVec);
3080 }
3081 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3082 ++NumZero;
3083 else {
3084 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3085 NonZeros |= 1 << i;
3086 ++NumNonZero;
3087 }
3088 }
3089
3090 // If we have more than 2 non-zeros, build each half separately.
3091 if (NumNonZero > 2) {
3092 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3093 ArrayRef<SDUse> Ops = Op->ops();
3094 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3095 Ops.slice(0, NumOperands / 2));
3096 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3097 Ops.slice(NumOperands / 2));
3098 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3099 }
3100
3101 // Otherwise, build it up through insert_subvectors.
3102 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3103 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3104 : DAG.getUNDEF(ResVT));
3105
3106 // Replace Undef operands with ZeroVector.
3107 for (SDValue U : Undefs)
3108 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3109
3110 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3111 unsigned NumSubElems = SubVT.getVectorNumElements();
3112 for (unsigned i = 0; i != NumOperands; ++i) {
3113 if ((NonZeros & (1 << i)) == 0)
3114 continue;
3115
3116 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3117 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3118 }
3119
3120 return Vec;
3121}
3122
3123SDValue
3124LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3125 SelectionDAG &DAG) const {
3126 MVT EltVT = Op.getSimpleValueType();
3127 SDValue Vec = Op->getOperand(0);
3128 EVT VecTy = Vec->getValueType(0);
3129 SDValue Idx = Op->getOperand(1);
3130 SDLoc DL(Op);
3131 MVT GRLenVT = Subtarget.getGRLenVT();
3132
3133 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3134
3135 if (isa<ConstantSDNode>(Idx))
3136 return Op;
3137
3138 switch (VecTy.getSimpleVT().SimpleTy) {
3139 default:
3140 llvm_unreachable("Unexpected type");
3141 case MVT::v32i8:
3142 case MVT::v16i16:
3143 case MVT::v4i64:
3144 case MVT::v4f64: {
3145 // Extract the high half subvector and place it to the low half of a new
3146 // vector. It doesn't matter what the high half of the new vector is.
3147 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3148 SDValue VecHi =
3149 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3150 SDValue TmpVec =
3151 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3152 VecHi, DAG.getConstant(0, DL, GRLenVT));
3153
3154 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3155 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3156 // desired element.
3157 SDValue IdxCp =
3158 Subtarget.is64Bit()
3159 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3160 : DAG.getBitcast(MVT::f32, Idx);
3161 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3162 SDValue MaskVec =
3163 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3164 SDValue ResVec =
3165 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3166
3167 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3168 DAG.getConstant(0, DL, GRLenVT));
3169 }
3170 case MVT::v8i32:
3171 case MVT::v8f32: {
3172 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3173 SDValue SplatValue =
3174 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3175
3176 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3177 DAG.getConstant(0, DL, GRLenVT));
3178 }
3179 }
3180}
3181
3182SDValue
3183LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3184 SelectionDAG &DAG) const {
3185 MVT VT = Op.getSimpleValueType();
3186 MVT EltVT = VT.getVectorElementType();
3187 unsigned NumElts = VT.getVectorNumElements();
3188 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3189 SDLoc DL(Op);
3190 SDValue Op0 = Op.getOperand(0);
3191 SDValue Op1 = Op.getOperand(1);
3192 SDValue Op2 = Op.getOperand(2);
3193
3194 if (isa<ConstantSDNode>(Op2))
3195 return Op;
3196
3197 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3198 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3199
3200 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3201 return SDValue();
3202
3203 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3204 SmallVector<SDValue, 32> RawIndices;
3205 SDValue SplatIdx;
3206 SDValue Indices;
3207
3208 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3209 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3210 for (unsigned i = 0; i < NumElts; ++i) {
3211 RawIndices.push_back(Op2);
3212 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3213 }
3214 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3215 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3216
3217 RawIndices.clear();
3218 for (unsigned i = 0; i < NumElts; ++i) {
3219 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3220 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3221 }
3222 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3223 Indices = DAG.getBitcast(IdxVTy, Indices);
3224 } else {
3225 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3226
3227 for (unsigned i = 0; i < NumElts; ++i)
3228 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3229 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3230 }
3231
3232 // insert vec, elt, idx
3233 // =>
3234 // select (splatidx == {0,1,2...}) ? splatelt : vec
3235 SDValue SelectCC =
3236 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3237 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3238}
3239
3240SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3241 SelectionDAG &DAG) const {
3242 SDLoc DL(Op);
3243 SyncScope::ID FenceSSID =
3244 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3245
3246 // singlethread fences only synchronize with signal handlers on the same
3247 // thread and thus only need to preserve instruction order, not actually
3248 // enforce memory ordering.
3249 if (FenceSSID == SyncScope::SingleThread)
3250 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3251 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3252
3253 return Op;
3254}
3255
3256SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3257 SelectionDAG &DAG) const {
3258
3259 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3260 DAG.getContext()->emitError(
3261 "On LA64, only 64-bit registers can be written.");
3262 return Op.getOperand(0);
3263 }
3264
3265 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3266 DAG.getContext()->emitError(
3267 "On LA32, only 32-bit registers can be written.");
3268 return Op.getOperand(0);
3269 }
3270
3271 return Op;
3272}
3273
3274SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3275 SelectionDAG &DAG) const {
3276 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3277 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3278 "be a constant integer");
3279 return SDValue();
3280 }
3281
3282 MachineFunction &MF = DAG.getMachineFunction();
3284 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3285 EVT VT = Op.getValueType();
3286 SDLoc DL(Op);
3287 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3288 unsigned Depth = Op.getConstantOperandVal(0);
3289 int GRLenInBytes = Subtarget.getGRLen() / 8;
3290
3291 while (Depth--) {
3292 int Offset = -(GRLenInBytes * 2);
3293 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3294 DAG.getSignedConstant(Offset, DL, VT));
3295 FrameAddr =
3296 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3297 }
3298 return FrameAddr;
3299}
3300
3301SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3302 SelectionDAG &DAG) const {
3303 // Currently only support lowering return address for current frame.
3304 if (Op.getConstantOperandVal(0) != 0) {
3305 DAG.getContext()->emitError(
3306 "return address can only be determined for the current frame");
3307 return SDValue();
3308 }
3309
3310 MachineFunction &MF = DAG.getMachineFunction();
3312 MVT GRLenVT = Subtarget.getGRLenVT();
3313
3314 // Return the value of the return address register, marking it an implicit
3315 // live-in.
3316 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3317 getRegClassFor(GRLenVT));
3318 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3319}
3320
3321SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3322 SelectionDAG &DAG) const {
3323 MachineFunction &MF = DAG.getMachineFunction();
3324 auto Size = Subtarget.getGRLen() / 8;
3325 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3326 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3327}
3328
3329SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3330 SelectionDAG &DAG) const {
3331 MachineFunction &MF = DAG.getMachineFunction();
3332 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3333
3334 SDLoc DL(Op);
3335 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3337
3338 // vastart just stores the address of the VarArgsFrameIndex slot into the
3339 // memory location argument.
3340 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3341 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3342 MachinePointerInfo(SV));
3343}
3344
3345SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3346 SelectionDAG &DAG) const {
3347 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3348 !Subtarget.hasBasicD() && "unexpected target features");
3349
3350 SDLoc DL(Op);
3351 SDValue Op0 = Op.getOperand(0);
3352 if (Op0->getOpcode() == ISD::AND) {
3353 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3354 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3355 return Op;
3356 }
3357
3358 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3359 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3360 Op0.getConstantOperandVal(2) == UINT64_C(0))
3361 return Op;
3362
3363 if (Op0.getOpcode() == ISD::AssertZext &&
3364 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3365 return Op;
3366
3367 EVT OpVT = Op0.getValueType();
3368 EVT RetVT = Op.getValueType();
3369 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3370 MakeLibCallOptions CallOptions;
3371 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3372 SDValue Chain = SDValue();
3374 std::tie(Result, Chain) =
3375 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3376 return Result;
3377}
3378
3379SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3380 SelectionDAG &DAG) const {
3381 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3382 !Subtarget.hasBasicD() && "unexpected target features");
3383
3384 SDLoc DL(Op);
3385 SDValue Op0 = Op.getOperand(0);
3386
3387 if ((Op0.getOpcode() == ISD::AssertSext ||
3389 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3390 return Op;
3391
3392 EVT OpVT = Op0.getValueType();
3393 EVT RetVT = Op.getValueType();
3394 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3395 MakeLibCallOptions CallOptions;
3396 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3397 SDValue Chain = SDValue();
3399 std::tie(Result, Chain) =
3400 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3401 return Result;
3402}
3403
3404SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3405 SelectionDAG &DAG) const {
3406
3407 SDLoc DL(Op);
3408 EVT VT = Op.getValueType();
3409 SDValue Op0 = Op.getOperand(0);
3410 EVT Op0VT = Op0.getValueType();
3411
3412 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3413 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3414 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3415 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3416 }
3417 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3418 SDValue Lo, Hi;
3419 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3420 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3421 }
3422 return Op;
3423}
3424
3425SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3426 SelectionDAG &DAG) const {
3427
3428 SDLoc DL(Op);
3429 SDValue Op0 = Op.getOperand(0);
3430
3431 if (Op0.getValueType() == MVT::f16)
3432 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3433
3434 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3435 !Subtarget.hasBasicD()) {
3436 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3437 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3438 }
3439
3440 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3441 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3442 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3443}
3444
3446 SelectionDAG &DAG, unsigned Flags) {
3447 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3448}
3449
3451 SelectionDAG &DAG, unsigned Flags) {
3452 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3453 Flags);
3454}
3455
3457 SelectionDAG &DAG, unsigned Flags) {
3458 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3459 N->getOffset(), Flags);
3460}
3461
3463 SelectionDAG &DAG, unsigned Flags) {
3464 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3465}
3466
3467template <class NodeTy>
3468SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3470 bool IsLocal) const {
3471 SDLoc DL(N);
3472 EVT Ty = getPointerTy(DAG.getDataLayout());
3473 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3474 SDValue Load;
3475
3476 switch (M) {
3477 default:
3478 report_fatal_error("Unsupported code model");
3479
3480 case CodeModel::Large: {
3481 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3482
3483 // This is not actually used, but is necessary for successfully matching
3484 // the PseudoLA_*_LARGE nodes.
3485 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3486 if (IsLocal) {
3487 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3488 // eventually becomes the desired 5-insn code sequence.
3489 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3490 Tmp, Addr),
3491 0);
3492 } else {
3493 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3494 // eventually becomes the desired 5-insn code sequence.
3495 Load = SDValue(
3496 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3497 0);
3498 }
3499 break;
3500 }
3501
3502 case CodeModel::Small:
3503 case CodeModel::Medium:
3504 if (IsLocal) {
3505 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3506 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3507 Load = SDValue(
3508 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3509 } else {
3510 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3511 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3512 Load =
3513 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3514 }
3515 }
3516
3517 if (!IsLocal) {
3518 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3519 MachineFunction &MF = DAG.getMachineFunction();
3520 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3524 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3525 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3526 }
3527
3528 return Load;
3529}
3530
3531SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3532 SelectionDAG &DAG) const {
3533 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3534 DAG.getTarget().getCodeModel());
3535}
3536
3537SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3538 SelectionDAG &DAG) const {
3539 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3540 DAG.getTarget().getCodeModel());
3541}
3542
3543SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3544 SelectionDAG &DAG) const {
3545 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3546 DAG.getTarget().getCodeModel());
3547}
3548
3549SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3550 SelectionDAG &DAG) const {
3551 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3552 assert(N->getOffset() == 0 && "unexpected offset in global node");
3553 auto CM = DAG.getTarget().getCodeModel();
3554 const GlobalValue *GV = N->getGlobal();
3555
3556 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3557 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3558 CM = *GCM;
3559 }
3560
3561 return getAddr(N, DAG, CM, GV->isDSOLocal());
3562}
3563
3564SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3565 SelectionDAG &DAG,
3566 unsigned Opc, bool UseGOT,
3567 bool Large) const {
3568 SDLoc DL(N);
3569 EVT Ty = getPointerTy(DAG.getDataLayout());
3570 MVT GRLenVT = Subtarget.getGRLenVT();
3571
3572 // This is not actually used, but is necessary for successfully matching the
3573 // PseudoLA_*_LARGE nodes.
3574 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3575 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3576
3577 // Only IE needs an extra argument for large code model.
3578 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3579 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3580 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3581
3582 // If it is LE for normal/medium code model, the add tp operation will occur
3583 // during the pseudo-instruction expansion.
3584 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3585 return Offset;
3586
3587 if (UseGOT) {
3588 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3589 MachineFunction &MF = DAG.getMachineFunction();
3590 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3594 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3595 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3596 }
3597
3598 // Add the thread pointer.
3599 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3600 DAG.getRegister(LoongArch::R2, GRLenVT));
3601}
3602
3603SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3604 SelectionDAG &DAG,
3605 unsigned Opc,
3606 bool Large) const {
3607 SDLoc DL(N);
3608 EVT Ty = getPointerTy(DAG.getDataLayout());
3609 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3610
3611 // This is not actually used, but is necessary for successfully matching the
3612 // PseudoLA_*_LARGE nodes.
3613 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3614
3615 // Use a PC-relative addressing mode to access the dynamic GOT address.
3616 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3617 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3618 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3619
3620 // Prepare argument list to generate call.
3622 Args.emplace_back(Load, CallTy);
3623
3624 // Setup call to __tls_get_addr.
3625 TargetLowering::CallLoweringInfo CLI(DAG);
3626 CLI.setDebugLoc(DL)
3627 .setChain(DAG.getEntryNode())
3628 .setLibCallee(CallingConv::C, CallTy,
3629 DAG.getExternalSymbol("__tls_get_addr", Ty),
3630 std::move(Args));
3631
3632 return LowerCallTo(CLI).first;
3633}
3634
3635SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3636 SelectionDAG &DAG, unsigned Opc,
3637 bool Large) const {
3638 SDLoc DL(N);
3639 EVT Ty = getPointerTy(DAG.getDataLayout());
3640 const GlobalValue *GV = N->getGlobal();
3641
3642 // This is not actually used, but is necessary for successfully matching the
3643 // PseudoLA_*_LARGE nodes.
3644 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3645
3646 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3647 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3648 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3649 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3650 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3651}
3652
3653SDValue
3654LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3655 SelectionDAG &DAG) const {
3658 report_fatal_error("In GHC calling convention TLS is not supported");
3659
3660 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3661 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3662
3663 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3664 assert(N->getOffset() == 0 && "unexpected offset in global node");
3665
3666 if (DAG.getTarget().useEmulatedTLS())
3667 reportFatalUsageError("the emulated TLS is prohibited");
3668
3669 bool IsDesc = DAG.getTarget().useTLSDESC();
3670
3671 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3673 // In this model, application code calls the dynamic linker function
3674 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3675 // runtime.
3676 if (!IsDesc)
3677 return getDynamicTLSAddr(N, DAG,
3678 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3679 : LoongArch::PseudoLA_TLS_GD,
3680 Large);
3681 break;
3683 // Same as GeneralDynamic, except for assembly modifiers and relocation
3684 // records.
3685 if (!IsDesc)
3686 return getDynamicTLSAddr(N, DAG,
3687 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3688 : LoongArch::PseudoLA_TLS_LD,
3689 Large);
3690 break;
3692 // This model uses the GOT to resolve TLS offsets.
3693 return getStaticTLSAddr(N, DAG,
3694 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3695 : LoongArch::PseudoLA_TLS_IE,
3696 /*UseGOT=*/true, Large);
3698 // This model is used when static linking as the TLS offsets are resolved
3699 // during program linking.
3700 //
3701 // This node doesn't need an extra argument for the large code model.
3702 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3703 /*UseGOT=*/false, Large);
3704 }
3705
3706 return getTLSDescAddr(N, DAG,
3707 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3708 : LoongArch::PseudoLA_TLS_DESC,
3709 Large);
3710}
3711
3712template <unsigned N>
3714 SelectionDAG &DAG, bool IsSigned = false) {
3715 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3716 // Check the ImmArg.
3717 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3718 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3719 DAG.getContext()->emitError(Op->getOperationName(0) +
3720 ": argument out of range.");
3721 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3722 }
3723 return SDValue();
3724}
3725
3726SDValue
3727LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3728 SelectionDAG &DAG) const {
3729 switch (Op.getConstantOperandVal(0)) {
3730 default:
3731 return SDValue(); // Don't custom lower most intrinsics.
3732 case Intrinsic::thread_pointer: {
3733 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3734 return DAG.getRegister(LoongArch::R2, PtrVT);
3735 }
3736 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3737 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3738 case Intrinsic::loongarch_lsx_vreplvei_d:
3739 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3740 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3741 case Intrinsic::loongarch_lsx_vreplvei_w:
3742 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3743 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3744 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3745 case Intrinsic::loongarch_lasx_xvpickve_d:
3746 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3747 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3748 case Intrinsic::loongarch_lasx_xvinsve0_d:
3749 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3750 case Intrinsic::loongarch_lsx_vsat_b:
3751 case Intrinsic::loongarch_lsx_vsat_bu:
3752 case Intrinsic::loongarch_lsx_vrotri_b:
3753 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3754 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3755 case Intrinsic::loongarch_lsx_vsrlri_b:
3756 case Intrinsic::loongarch_lsx_vsrari_b:
3757 case Intrinsic::loongarch_lsx_vreplvei_h:
3758 case Intrinsic::loongarch_lasx_xvsat_b:
3759 case Intrinsic::loongarch_lasx_xvsat_bu:
3760 case Intrinsic::loongarch_lasx_xvrotri_b:
3761 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3762 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3763 case Intrinsic::loongarch_lasx_xvsrlri_b:
3764 case Intrinsic::loongarch_lasx_xvsrari_b:
3765 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3766 case Intrinsic::loongarch_lasx_xvpickve_w:
3767 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3768 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3769 case Intrinsic::loongarch_lasx_xvinsve0_w:
3770 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3771 case Intrinsic::loongarch_lsx_vsat_h:
3772 case Intrinsic::loongarch_lsx_vsat_hu:
3773 case Intrinsic::loongarch_lsx_vrotri_h:
3774 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3775 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3776 case Intrinsic::loongarch_lsx_vsrlri_h:
3777 case Intrinsic::loongarch_lsx_vsrari_h:
3778 case Intrinsic::loongarch_lsx_vreplvei_b:
3779 case Intrinsic::loongarch_lasx_xvsat_h:
3780 case Intrinsic::loongarch_lasx_xvsat_hu:
3781 case Intrinsic::loongarch_lasx_xvrotri_h:
3782 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3783 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3784 case Intrinsic::loongarch_lasx_xvsrlri_h:
3785 case Intrinsic::loongarch_lasx_xvsrari_h:
3786 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3787 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3788 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3789 case Intrinsic::loongarch_lsx_vsrani_b_h:
3790 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3791 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3792 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3793 case Intrinsic::loongarch_lsx_vssrani_b_h:
3794 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3795 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3796 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3797 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3798 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3799 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3800 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3801 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3802 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3803 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3804 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3805 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3806 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3807 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3808 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3809 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3810 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3811 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3812 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3813 case Intrinsic::loongarch_lsx_vsat_w:
3814 case Intrinsic::loongarch_lsx_vsat_wu:
3815 case Intrinsic::loongarch_lsx_vrotri_w:
3816 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3817 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3818 case Intrinsic::loongarch_lsx_vsrlri_w:
3819 case Intrinsic::loongarch_lsx_vsrari_w:
3820 case Intrinsic::loongarch_lsx_vslei_bu:
3821 case Intrinsic::loongarch_lsx_vslei_hu:
3822 case Intrinsic::loongarch_lsx_vslei_wu:
3823 case Intrinsic::loongarch_lsx_vslei_du:
3824 case Intrinsic::loongarch_lsx_vslti_bu:
3825 case Intrinsic::loongarch_lsx_vslti_hu:
3826 case Intrinsic::loongarch_lsx_vslti_wu:
3827 case Intrinsic::loongarch_lsx_vslti_du:
3828 case Intrinsic::loongarch_lsx_vbsll_v:
3829 case Intrinsic::loongarch_lsx_vbsrl_v:
3830 case Intrinsic::loongarch_lasx_xvsat_w:
3831 case Intrinsic::loongarch_lasx_xvsat_wu:
3832 case Intrinsic::loongarch_lasx_xvrotri_w:
3833 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3834 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3835 case Intrinsic::loongarch_lasx_xvsrlri_w:
3836 case Intrinsic::loongarch_lasx_xvsrari_w:
3837 case Intrinsic::loongarch_lasx_xvslei_bu:
3838 case Intrinsic::loongarch_lasx_xvslei_hu:
3839 case Intrinsic::loongarch_lasx_xvslei_wu:
3840 case Intrinsic::loongarch_lasx_xvslei_du:
3841 case Intrinsic::loongarch_lasx_xvslti_bu:
3842 case Intrinsic::loongarch_lasx_xvslti_hu:
3843 case Intrinsic::loongarch_lasx_xvslti_wu:
3844 case Intrinsic::loongarch_lasx_xvslti_du:
3845 case Intrinsic::loongarch_lasx_xvbsll_v:
3846 case Intrinsic::loongarch_lasx_xvbsrl_v:
3847 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3848 case Intrinsic::loongarch_lsx_vseqi_b:
3849 case Intrinsic::loongarch_lsx_vseqi_h:
3850 case Intrinsic::loongarch_lsx_vseqi_w:
3851 case Intrinsic::loongarch_lsx_vseqi_d:
3852 case Intrinsic::loongarch_lsx_vslei_b:
3853 case Intrinsic::loongarch_lsx_vslei_h:
3854 case Intrinsic::loongarch_lsx_vslei_w:
3855 case Intrinsic::loongarch_lsx_vslei_d:
3856 case Intrinsic::loongarch_lsx_vslti_b:
3857 case Intrinsic::loongarch_lsx_vslti_h:
3858 case Intrinsic::loongarch_lsx_vslti_w:
3859 case Intrinsic::loongarch_lsx_vslti_d:
3860 case Intrinsic::loongarch_lasx_xvseqi_b:
3861 case Intrinsic::loongarch_lasx_xvseqi_h:
3862 case Intrinsic::loongarch_lasx_xvseqi_w:
3863 case Intrinsic::loongarch_lasx_xvseqi_d:
3864 case Intrinsic::loongarch_lasx_xvslei_b:
3865 case Intrinsic::loongarch_lasx_xvslei_h:
3866 case Intrinsic::loongarch_lasx_xvslei_w:
3867 case Intrinsic::loongarch_lasx_xvslei_d:
3868 case Intrinsic::loongarch_lasx_xvslti_b:
3869 case Intrinsic::loongarch_lasx_xvslti_h:
3870 case Intrinsic::loongarch_lasx_xvslti_w:
3871 case Intrinsic::loongarch_lasx_xvslti_d:
3872 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3873 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3874 case Intrinsic::loongarch_lsx_vsrani_h_w:
3875 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3876 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3877 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3878 case Intrinsic::loongarch_lsx_vssrani_h_w:
3879 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3880 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3881 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3882 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3883 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3884 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3885 case Intrinsic::loongarch_lsx_vfrstpi_b:
3886 case Intrinsic::loongarch_lsx_vfrstpi_h:
3887 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3888 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3889 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3890 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3891 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3892 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3893 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3894 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3895 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3896 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3897 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3898 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3899 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3900 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3901 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3902 case Intrinsic::loongarch_lsx_vsat_d:
3903 case Intrinsic::loongarch_lsx_vsat_du:
3904 case Intrinsic::loongarch_lsx_vrotri_d:
3905 case Intrinsic::loongarch_lsx_vsrlri_d:
3906 case Intrinsic::loongarch_lsx_vsrari_d:
3907 case Intrinsic::loongarch_lasx_xvsat_d:
3908 case Intrinsic::loongarch_lasx_xvsat_du:
3909 case Intrinsic::loongarch_lasx_xvrotri_d:
3910 case Intrinsic::loongarch_lasx_xvsrlri_d:
3911 case Intrinsic::loongarch_lasx_xvsrari_d:
3912 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3913 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3914 case Intrinsic::loongarch_lsx_vsrani_w_d:
3915 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3916 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3917 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3918 case Intrinsic::loongarch_lsx_vssrani_w_d:
3919 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3920 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3921 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3922 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3923 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3924 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3925 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3926 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3927 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3928 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3929 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3930 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3931 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3932 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3933 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3934 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3935 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3936 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3937 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3938 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3939 case Intrinsic::loongarch_lsx_vsrani_d_q:
3940 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3941 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3942 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3943 case Intrinsic::loongarch_lsx_vssrani_d_q:
3944 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3945 case Intrinsic::loongarch_lsx_vssrani_du_q:
3946 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3947 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3948 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3949 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3950 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3951 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3952 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3953 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3954 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3955 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3956 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3957 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3958 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3959 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3960 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3961 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3962 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3963 case Intrinsic::loongarch_lsx_vnori_b:
3964 case Intrinsic::loongarch_lsx_vshuf4i_b:
3965 case Intrinsic::loongarch_lsx_vshuf4i_h:
3966 case Intrinsic::loongarch_lsx_vshuf4i_w:
3967 case Intrinsic::loongarch_lasx_xvnori_b:
3968 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3969 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3970 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3971 case Intrinsic::loongarch_lasx_xvpermi_d:
3972 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3973 case Intrinsic::loongarch_lsx_vshuf4i_d:
3974 case Intrinsic::loongarch_lsx_vpermi_w:
3975 case Intrinsic::loongarch_lsx_vbitseli_b:
3976 case Intrinsic::loongarch_lsx_vextrins_b:
3977 case Intrinsic::loongarch_lsx_vextrins_h:
3978 case Intrinsic::loongarch_lsx_vextrins_w:
3979 case Intrinsic::loongarch_lsx_vextrins_d:
3980 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3981 case Intrinsic::loongarch_lasx_xvpermi_w:
3982 case Intrinsic::loongarch_lasx_xvpermi_q:
3983 case Intrinsic::loongarch_lasx_xvbitseli_b:
3984 case Intrinsic::loongarch_lasx_xvextrins_b:
3985 case Intrinsic::loongarch_lasx_xvextrins_h:
3986 case Intrinsic::loongarch_lasx_xvextrins_w:
3987 case Intrinsic::loongarch_lasx_xvextrins_d:
3988 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3989 case Intrinsic::loongarch_lsx_vrepli_b:
3990 case Intrinsic::loongarch_lsx_vrepli_h:
3991 case Intrinsic::loongarch_lsx_vrepli_w:
3992 case Intrinsic::loongarch_lsx_vrepli_d:
3993 case Intrinsic::loongarch_lasx_xvrepli_b:
3994 case Intrinsic::loongarch_lasx_xvrepli_h:
3995 case Intrinsic::loongarch_lasx_xvrepli_w:
3996 case Intrinsic::loongarch_lasx_xvrepli_d:
3997 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3998 case Intrinsic::loongarch_lsx_vldi:
3999 case Intrinsic::loongarch_lasx_xvldi:
4000 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4001 }
4002}
4003
4004// Helper function that emits error message for intrinsics with chain and return
4005// merge values of a UNDEF and the chain.
4007 StringRef ErrorMsg,
4008 SelectionDAG &DAG) {
4009 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4010 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4011 SDLoc(Op));
4012}
4013
4014SDValue
4015LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4016 SelectionDAG &DAG) const {
4017 SDLoc DL(Op);
4018 MVT GRLenVT = Subtarget.getGRLenVT();
4019 EVT VT = Op.getValueType();
4020 SDValue Chain = Op.getOperand(0);
4021 const StringRef ErrorMsgOOR = "argument out of range";
4022 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4023 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4024
4025 switch (Op.getConstantOperandVal(1)) {
4026 default:
4027 return Op;
4028 case Intrinsic::loongarch_crc_w_b_w:
4029 case Intrinsic::loongarch_crc_w_h_w:
4030 case Intrinsic::loongarch_crc_w_w_w:
4031 case Intrinsic::loongarch_crc_w_d_w:
4032 case Intrinsic::loongarch_crcc_w_b_w:
4033 case Intrinsic::loongarch_crcc_w_h_w:
4034 case Intrinsic::loongarch_crcc_w_w_w:
4035 case Intrinsic::loongarch_crcc_w_d_w:
4036 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4037 case Intrinsic::loongarch_csrrd_w:
4038 case Intrinsic::loongarch_csrrd_d: {
4039 unsigned Imm = Op.getConstantOperandVal(2);
4040 return !isUInt<14>(Imm)
4041 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4042 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4043 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4044 }
4045 case Intrinsic::loongarch_csrwr_w:
4046 case Intrinsic::loongarch_csrwr_d: {
4047 unsigned Imm = Op.getConstantOperandVal(3);
4048 return !isUInt<14>(Imm)
4049 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4050 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4051 {Chain, Op.getOperand(2),
4052 DAG.getConstant(Imm, DL, GRLenVT)});
4053 }
4054 case Intrinsic::loongarch_csrxchg_w:
4055 case Intrinsic::loongarch_csrxchg_d: {
4056 unsigned Imm = Op.getConstantOperandVal(4);
4057 return !isUInt<14>(Imm)
4058 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4059 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4060 {Chain, Op.getOperand(2), Op.getOperand(3),
4061 DAG.getConstant(Imm, DL, GRLenVT)});
4062 }
4063 case Intrinsic::loongarch_iocsrrd_d: {
4064 return DAG.getNode(
4065 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4066 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4067 }
4068#define IOCSRRD_CASE(NAME, NODE) \
4069 case Intrinsic::loongarch_##NAME: { \
4070 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4071 {Chain, Op.getOperand(2)}); \
4072 }
4073 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4074 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4075 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4076#undef IOCSRRD_CASE
4077 case Intrinsic::loongarch_cpucfg: {
4078 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4079 {Chain, Op.getOperand(2)});
4080 }
4081 case Intrinsic::loongarch_lddir_d: {
4082 unsigned Imm = Op.getConstantOperandVal(3);
4083 return !isUInt<8>(Imm)
4084 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4085 : Op;
4086 }
4087 case Intrinsic::loongarch_movfcsr2gr: {
4088 if (!Subtarget.hasBasicF())
4089 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4090 unsigned Imm = Op.getConstantOperandVal(2);
4091 return !isUInt<2>(Imm)
4092 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4093 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4094 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4095 }
4096 case Intrinsic::loongarch_lsx_vld:
4097 case Intrinsic::loongarch_lsx_vldrepl_b:
4098 case Intrinsic::loongarch_lasx_xvld:
4099 case Intrinsic::loongarch_lasx_xvldrepl_b:
4100 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4101 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4102 : SDValue();
4103 case Intrinsic::loongarch_lsx_vldrepl_h:
4104 case Intrinsic::loongarch_lasx_xvldrepl_h:
4105 return !isShiftedInt<11, 1>(
4106 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4108 Op, "argument out of range or not a multiple of 2", DAG)
4109 : SDValue();
4110 case Intrinsic::loongarch_lsx_vldrepl_w:
4111 case Intrinsic::loongarch_lasx_xvldrepl_w:
4112 return !isShiftedInt<10, 2>(
4113 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4115 Op, "argument out of range or not a multiple of 4", DAG)
4116 : SDValue();
4117 case Intrinsic::loongarch_lsx_vldrepl_d:
4118 case Intrinsic::loongarch_lasx_xvldrepl_d:
4119 return !isShiftedInt<9, 3>(
4120 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4122 Op, "argument out of range or not a multiple of 8", DAG)
4123 : SDValue();
4124 }
4125}
4126
4127// Helper function that emits error message for intrinsics with void return
4128// value and return the chain.
4130 SelectionDAG &DAG) {
4131
4132 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4133 return Op.getOperand(0);
4134}
4135
4136SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4137 SelectionDAG &DAG) const {
4138 SDLoc DL(Op);
4139 MVT GRLenVT = Subtarget.getGRLenVT();
4140 SDValue Chain = Op.getOperand(0);
4141 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4142 SDValue Op2 = Op.getOperand(2);
4143 const StringRef ErrorMsgOOR = "argument out of range";
4144 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4145 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4146 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4147
4148 switch (IntrinsicEnum) {
4149 default:
4150 // TODO: Add more Intrinsics.
4151 return SDValue();
4152 case Intrinsic::loongarch_cacop_d:
4153 case Intrinsic::loongarch_cacop_w: {
4154 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4155 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4156 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4157 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4158 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4159 unsigned Imm1 = Op2->getAsZExtVal();
4160 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4161 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4162 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4163 return Op;
4164 }
4165 case Intrinsic::loongarch_dbar: {
4166 unsigned Imm = Op2->getAsZExtVal();
4167 return !isUInt<15>(Imm)
4168 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4169 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4170 DAG.getConstant(Imm, DL, GRLenVT));
4171 }
4172 case Intrinsic::loongarch_ibar: {
4173 unsigned Imm = Op2->getAsZExtVal();
4174 return !isUInt<15>(Imm)
4175 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4176 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4177 DAG.getConstant(Imm, DL, GRLenVT));
4178 }
4179 case Intrinsic::loongarch_break: {
4180 unsigned Imm = Op2->getAsZExtVal();
4181 return !isUInt<15>(Imm)
4182 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4183 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4184 DAG.getConstant(Imm, DL, GRLenVT));
4185 }
4186 case Intrinsic::loongarch_movgr2fcsr: {
4187 if (!Subtarget.hasBasicF())
4188 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4189 unsigned Imm = Op2->getAsZExtVal();
4190 return !isUInt<2>(Imm)
4191 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4192 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4193 DAG.getConstant(Imm, DL, GRLenVT),
4194 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4195 Op.getOperand(3)));
4196 }
4197 case Intrinsic::loongarch_syscall: {
4198 unsigned Imm = Op2->getAsZExtVal();
4199 return !isUInt<15>(Imm)
4200 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4201 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4202 DAG.getConstant(Imm, DL, GRLenVT));
4203 }
4204#define IOCSRWR_CASE(NAME, NODE) \
4205 case Intrinsic::loongarch_##NAME: { \
4206 SDValue Op3 = Op.getOperand(3); \
4207 return Subtarget.is64Bit() \
4208 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4209 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4210 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4211 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4212 Op3); \
4213 }
4214 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4215 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4216 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4217#undef IOCSRWR_CASE
4218 case Intrinsic::loongarch_iocsrwr_d: {
4219 return !Subtarget.is64Bit()
4220 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4221 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4222 Op2,
4223 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4224 Op.getOperand(3)));
4225 }
4226#define ASRT_LE_GT_CASE(NAME) \
4227 case Intrinsic::loongarch_##NAME: { \
4228 return !Subtarget.is64Bit() \
4229 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4230 : Op; \
4231 }
4232 ASRT_LE_GT_CASE(asrtle_d)
4233 ASRT_LE_GT_CASE(asrtgt_d)
4234#undef ASRT_LE_GT_CASE
4235 case Intrinsic::loongarch_ldpte_d: {
4236 unsigned Imm = Op.getConstantOperandVal(3);
4237 return !Subtarget.is64Bit()
4238 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4239 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4240 : Op;
4241 }
4242 case Intrinsic::loongarch_lsx_vst:
4243 case Intrinsic::loongarch_lasx_xvst:
4244 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4245 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4246 : SDValue();
4247 case Intrinsic::loongarch_lasx_xvstelm_b:
4248 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4249 !isUInt<5>(Op.getConstantOperandVal(5)))
4250 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4251 : SDValue();
4252 case Intrinsic::loongarch_lsx_vstelm_b:
4253 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4254 !isUInt<4>(Op.getConstantOperandVal(5)))
4255 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4256 : SDValue();
4257 case Intrinsic::loongarch_lasx_xvstelm_h:
4258 return (!isShiftedInt<8, 1>(
4259 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4260 !isUInt<4>(Op.getConstantOperandVal(5)))
4262 Op, "argument out of range or not a multiple of 2", DAG)
4263 : SDValue();
4264 case Intrinsic::loongarch_lsx_vstelm_h:
4265 return (!isShiftedInt<8, 1>(
4266 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4267 !isUInt<3>(Op.getConstantOperandVal(5)))
4269 Op, "argument out of range or not a multiple of 2", DAG)
4270 : SDValue();
4271 case Intrinsic::loongarch_lasx_xvstelm_w:
4272 return (!isShiftedInt<8, 2>(
4273 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4274 !isUInt<3>(Op.getConstantOperandVal(5)))
4276 Op, "argument out of range or not a multiple of 4", DAG)
4277 : SDValue();
4278 case Intrinsic::loongarch_lsx_vstelm_w:
4279 return (!isShiftedInt<8, 2>(
4280 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4281 !isUInt<2>(Op.getConstantOperandVal(5)))
4283 Op, "argument out of range or not a multiple of 4", DAG)
4284 : SDValue();
4285 case Intrinsic::loongarch_lasx_xvstelm_d:
4286 return (!isShiftedInt<8, 3>(
4287 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4288 !isUInt<2>(Op.getConstantOperandVal(5)))
4290 Op, "argument out of range or not a multiple of 8", DAG)
4291 : SDValue();
4292 case Intrinsic::loongarch_lsx_vstelm_d:
4293 return (!isShiftedInt<8, 3>(
4294 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4295 !isUInt<1>(Op.getConstantOperandVal(5)))
4297 Op, "argument out of range or not a multiple of 8", DAG)
4298 : SDValue();
4299 }
4300}
4301
4302SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4303 SelectionDAG &DAG) const {
4304 SDLoc DL(Op);
4305 SDValue Lo = Op.getOperand(0);
4306 SDValue Hi = Op.getOperand(1);
4307 SDValue Shamt = Op.getOperand(2);
4308 EVT VT = Lo.getValueType();
4309
4310 // if Shamt-GRLen < 0: // Shamt < GRLen
4311 // Lo = Lo << Shamt
4312 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4313 // else:
4314 // Lo = 0
4315 // Hi = Lo << (Shamt-GRLen)
4316
4317 SDValue Zero = DAG.getConstant(0, DL, VT);
4318 SDValue One = DAG.getConstant(1, DL, VT);
4319 SDValue MinusGRLen =
4320 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4321 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4322 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4323 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4324
4325 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4326 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4327 SDValue ShiftRightLo =
4328 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4329 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4330 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4331 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4332
4333 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4334
4335 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4336 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4337
4338 SDValue Parts[2] = {Lo, Hi};
4339 return DAG.getMergeValues(Parts, DL);
4340}
4341
4342SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4343 SelectionDAG &DAG,
4344 bool IsSRA) const {
4345 SDLoc DL(Op);
4346 SDValue Lo = Op.getOperand(0);
4347 SDValue Hi = Op.getOperand(1);
4348 SDValue Shamt = Op.getOperand(2);
4349 EVT VT = Lo.getValueType();
4350
4351 // SRA expansion:
4352 // if Shamt-GRLen < 0: // Shamt < GRLen
4353 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4354 // Hi = Hi >>s Shamt
4355 // else:
4356 // Lo = Hi >>s (Shamt-GRLen);
4357 // Hi = Hi >>s (GRLen-1)
4358 //
4359 // SRL expansion:
4360 // if Shamt-GRLen < 0: // Shamt < GRLen
4361 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4362 // Hi = Hi >>u Shamt
4363 // else:
4364 // Lo = Hi >>u (Shamt-GRLen);
4365 // Hi = 0;
4366
4367 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4368
4369 SDValue Zero = DAG.getConstant(0, DL, VT);
4370 SDValue One = DAG.getConstant(1, DL, VT);
4371 SDValue MinusGRLen =
4372 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4373 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4374 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4375 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4376
4377 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4378 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4379 SDValue ShiftLeftHi =
4380 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4381 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4382 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4383 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4384 SDValue HiFalse =
4385 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4386
4387 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4388
4389 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4390 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4391
4392 SDValue Parts[2] = {Lo, Hi};
4393 return DAG.getMergeValues(Parts, DL);
4394}
4395
4396// Returns the opcode of the target-specific SDNode that implements the 32-bit
4397// form of the given Opcode.
4399 switch (Opcode) {
4400 default:
4401 llvm_unreachable("Unexpected opcode");
4402 case ISD::SDIV:
4403 return LoongArchISD::DIV_W;
4404 case ISD::UDIV:
4405 return LoongArchISD::DIV_WU;
4406 case ISD::SREM:
4407 return LoongArchISD::MOD_W;
4408 case ISD::UREM:
4409 return LoongArchISD::MOD_WU;
4410 case ISD::SHL:
4411 return LoongArchISD::SLL_W;
4412 case ISD::SRA:
4413 return LoongArchISD::SRA_W;
4414 case ISD::SRL:
4415 return LoongArchISD::SRL_W;
4416 case ISD::ROTL:
4417 case ISD::ROTR:
4418 return LoongArchISD::ROTR_W;
4419 case ISD::CTTZ:
4420 return LoongArchISD::CTZ_W;
4421 case ISD::CTLZ:
4422 return LoongArchISD::CLZ_W;
4423 }
4424}
4425
4426// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4427// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4428// otherwise be promoted to i64, making it difficult to select the
4429// SLL_W/.../*W later one because the fact the operation was originally of
4430// type i8/i16/i32 is lost.
4432 unsigned ExtOpc = ISD::ANY_EXTEND) {
4433 SDLoc DL(N);
4434 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4435 SDValue NewOp0, NewRes;
4436
4437 switch (NumOp) {
4438 default:
4439 llvm_unreachable("Unexpected NumOp");
4440 case 1: {
4441 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4442 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4443 break;
4444 }
4445 case 2: {
4446 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4447 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4448 if (N->getOpcode() == ISD::ROTL) {
4449 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4450 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4451 }
4452 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4453 break;
4454 }
4455 // TODO:Handle more NumOp.
4456 }
4457
4458 // ReplaceNodeResults requires we maintain the same type for the return
4459 // value.
4460 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4461}
4462
4463// Converts the given 32-bit operation to a i64 operation with signed extension
4464// semantic to reduce the signed extension instructions.
4466 SDLoc DL(N);
4467 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4468 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4469 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4470 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4471 DAG.getValueType(MVT::i32));
4472 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4473}
4474
4475// Helper function that emits error message for intrinsics with/without chain
4476// and return a UNDEF or and the chain as the results.
4479 StringRef ErrorMsg, bool WithChain = true) {
4480 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4481 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4482 if (!WithChain)
4483 return;
4484 Results.push_back(N->getOperand(0));
4485}
4486
4487template <unsigned N>
4488static void
4490 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4491 unsigned ResOp) {
4492 const StringRef ErrorMsgOOR = "argument out of range";
4493 unsigned Imm = Node->getConstantOperandVal(2);
4494 if (!isUInt<N>(Imm)) {
4496 /*WithChain=*/false);
4497 return;
4498 }
4499 SDLoc DL(Node);
4500 SDValue Vec = Node->getOperand(1);
4501
4502 SDValue PickElt =
4503 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4504 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4507 PickElt.getValue(0)));
4508}
4509
4512 SelectionDAG &DAG,
4513 const LoongArchSubtarget &Subtarget,
4514 unsigned ResOp) {
4515 SDLoc DL(N);
4516 SDValue Vec = N->getOperand(1);
4517
4518 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4519 Results.push_back(
4520 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4521}
4522
4523static void
4525 SelectionDAG &DAG,
4526 const LoongArchSubtarget &Subtarget) {
4527 switch (N->getConstantOperandVal(0)) {
4528 default:
4529 llvm_unreachable("Unexpected Intrinsic.");
4530 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4531 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4533 break;
4534 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4535 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4536 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4538 break;
4539 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4540 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4542 break;
4543 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4544 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4546 break;
4547 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4548 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4549 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4551 break;
4552 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4553 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4555 break;
4556 case Intrinsic::loongarch_lsx_bz_b:
4557 case Intrinsic::loongarch_lsx_bz_h:
4558 case Intrinsic::loongarch_lsx_bz_w:
4559 case Intrinsic::loongarch_lsx_bz_d:
4560 case Intrinsic::loongarch_lasx_xbz_b:
4561 case Intrinsic::loongarch_lasx_xbz_h:
4562 case Intrinsic::loongarch_lasx_xbz_w:
4563 case Intrinsic::loongarch_lasx_xbz_d:
4564 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4566 break;
4567 case Intrinsic::loongarch_lsx_bz_v:
4568 case Intrinsic::loongarch_lasx_xbz_v:
4569 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4571 break;
4572 case Intrinsic::loongarch_lsx_bnz_b:
4573 case Intrinsic::loongarch_lsx_bnz_h:
4574 case Intrinsic::loongarch_lsx_bnz_w:
4575 case Intrinsic::loongarch_lsx_bnz_d:
4576 case Intrinsic::loongarch_lasx_xbnz_b:
4577 case Intrinsic::loongarch_lasx_xbnz_h:
4578 case Intrinsic::loongarch_lasx_xbnz_w:
4579 case Intrinsic::loongarch_lasx_xbnz_d:
4580 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4582 break;
4583 case Intrinsic::loongarch_lsx_bnz_v:
4584 case Intrinsic::loongarch_lasx_xbnz_v:
4585 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4587 break;
4588 }
4589}
4590
4593 SelectionDAG &DAG) {
4594 assert(N->getValueType(0) == MVT::i128 &&
4595 "AtomicCmpSwap on types less than 128 should be legal");
4596 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4597
4598 unsigned Opcode;
4599 switch (MemOp->getMergedOrdering()) {
4603 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4604 break;
4607 Opcode = LoongArch::PseudoCmpXchg128;
4608 break;
4609 default:
4610 llvm_unreachable("Unexpected ordering!");
4611 }
4612
4613 SDLoc DL(N);
4614 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4615 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4616 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4617 NewVal.first, NewVal.second, N->getOperand(0)};
4618
4619 SDNode *CmpSwap = DAG.getMachineNode(
4620 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4621 Ops);
4622 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4623 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4624 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4625 Results.push_back(SDValue(CmpSwap, 3));
4626}
4627
4630 SDLoc DL(N);
4631 EVT VT = N->getValueType(0);
4632 switch (N->getOpcode()) {
4633 default:
4634 llvm_unreachable("Don't know how to legalize this operation");
4635 case ISD::ADD:
4636 case ISD::SUB:
4637 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4638 "Unexpected custom legalisation");
4639 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4640 break;
4641 case ISD::SDIV:
4642 case ISD::UDIV:
4643 case ISD::SREM:
4644 case ISD::UREM:
4645 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4646 "Unexpected custom legalisation");
4647 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4648 Subtarget.hasDiv32() && VT == MVT::i32
4650 : ISD::SIGN_EXTEND));
4651 break;
4652 case ISD::SHL:
4653 case ISD::SRA:
4654 case ISD::SRL:
4655 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4656 "Unexpected custom legalisation");
4657 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4658 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4659 break;
4660 }
4661 break;
4662 case ISD::ROTL:
4663 case ISD::ROTR:
4664 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4665 "Unexpected custom legalisation");
4666 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4667 break;
4668 case ISD::FP_TO_SINT: {
4669 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4670 "Unexpected custom legalisation");
4671 SDValue Src = N->getOperand(0);
4672 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4673 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4675 if (!isTypeLegal(Src.getValueType()))
4676 return;
4677 if (Src.getValueType() == MVT::f16)
4678 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4679 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4680 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4681 return;
4682 }
4683 // If the FP type needs to be softened, emit a library call using the 'si'
4684 // version. If we left it to default legalization we'd end up with 'di'.
4685 RTLIB::Libcall LC;
4686 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4687 MakeLibCallOptions CallOptions;
4688 EVT OpVT = Src.getValueType();
4689 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4690 SDValue Chain = SDValue();
4691 SDValue Result;
4692 std::tie(Result, Chain) =
4693 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4694 Results.push_back(Result);
4695 break;
4696 }
4697 case ISD::BITCAST: {
4698 SDValue Src = N->getOperand(0);
4699 EVT SrcVT = Src.getValueType();
4700 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4701 Subtarget.hasBasicF()) {
4702 SDValue Dst =
4703 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4704 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4705 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4707 DAG.getVTList(MVT::i32, MVT::i32), Src);
4708 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4709 NewReg.getValue(0), NewReg.getValue(1));
4710 Results.push_back(RetReg);
4711 }
4712 break;
4713 }
4714 case ISD::FP_TO_UINT: {
4715 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4716 "Unexpected custom legalisation");
4717 auto &TLI = DAG.getTargetLoweringInfo();
4718 SDValue Tmp1, Tmp2;
4719 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4720 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4721 break;
4722 }
4723 case ISD::BSWAP: {
4724 SDValue Src = N->getOperand(0);
4725 assert((VT == MVT::i16 || VT == MVT::i32) &&
4726 "Unexpected custom legalization");
4727 MVT GRLenVT = Subtarget.getGRLenVT();
4728 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4729 SDValue Tmp;
4730 switch (VT.getSizeInBits()) {
4731 default:
4732 llvm_unreachable("Unexpected operand width");
4733 case 16:
4734 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4735 break;
4736 case 32:
4737 // Only LA64 will get to here due to the size mismatch between VT and
4738 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4739 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4740 break;
4741 }
4742 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4743 break;
4744 }
4745 case ISD::BITREVERSE: {
4746 SDValue Src = N->getOperand(0);
4747 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4748 "Unexpected custom legalization");
4749 MVT GRLenVT = Subtarget.getGRLenVT();
4750 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4751 SDValue Tmp;
4752 switch (VT.getSizeInBits()) {
4753 default:
4754 llvm_unreachable("Unexpected operand width");
4755 case 8:
4756 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4757 break;
4758 case 32:
4759 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4760 break;
4761 }
4762 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4763 break;
4764 }
4765 case ISD::CTLZ:
4766 case ISD::CTTZ: {
4767 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4768 "Unexpected custom legalisation");
4769 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4770 break;
4771 }
4773 SDValue Chain = N->getOperand(0);
4774 SDValue Op2 = N->getOperand(2);
4775 MVT GRLenVT = Subtarget.getGRLenVT();
4776 const StringRef ErrorMsgOOR = "argument out of range";
4777 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4778 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4779
4780 switch (N->getConstantOperandVal(1)) {
4781 default:
4782 llvm_unreachable("Unexpected Intrinsic.");
4783 case Intrinsic::loongarch_movfcsr2gr: {
4784 if (!Subtarget.hasBasicF()) {
4785 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4786 return;
4787 }
4788 unsigned Imm = Op2->getAsZExtVal();
4789 if (!isUInt<2>(Imm)) {
4790 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4791 return;
4792 }
4793 SDValue MOVFCSR2GRResults = DAG.getNode(
4794 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4795 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4796 Results.push_back(
4797 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4798 Results.push_back(MOVFCSR2GRResults.getValue(1));
4799 break;
4800 }
4801#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4802 case Intrinsic::loongarch_##NAME: { \
4803 SDValue NODE = DAG.getNode( \
4804 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4805 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4806 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4807 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4808 Results.push_back(NODE.getValue(1)); \
4809 break; \
4810 }
4811 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4812 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4813 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4814 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4815 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4816 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4817#undef CRC_CASE_EXT_BINARYOP
4818
4819#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4820 case Intrinsic::loongarch_##NAME: { \
4821 SDValue NODE = DAG.getNode( \
4822 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4823 {Chain, Op2, \
4824 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4825 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4826 Results.push_back(NODE.getValue(1)); \
4827 break; \
4828 }
4829 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4830 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4831#undef CRC_CASE_EXT_UNARYOP
4832#define CSR_CASE(ID) \
4833 case Intrinsic::loongarch_##ID: { \
4834 if (!Subtarget.is64Bit()) \
4835 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4836 break; \
4837 }
4838 CSR_CASE(csrrd_d);
4839 CSR_CASE(csrwr_d);
4840 CSR_CASE(csrxchg_d);
4841 CSR_CASE(iocsrrd_d);
4842#undef CSR_CASE
4843 case Intrinsic::loongarch_csrrd_w: {
4844 unsigned Imm = Op2->getAsZExtVal();
4845 if (!isUInt<14>(Imm)) {
4846 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4847 return;
4848 }
4849 SDValue CSRRDResults =
4850 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4851 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4852 Results.push_back(
4853 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4854 Results.push_back(CSRRDResults.getValue(1));
4855 break;
4856 }
4857 case Intrinsic::loongarch_csrwr_w: {
4858 unsigned Imm = N->getConstantOperandVal(3);
4859 if (!isUInt<14>(Imm)) {
4860 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4861 return;
4862 }
4863 SDValue CSRWRResults =
4864 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4865 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4866 DAG.getConstant(Imm, DL, GRLenVT)});
4867 Results.push_back(
4868 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4869 Results.push_back(CSRWRResults.getValue(1));
4870 break;
4871 }
4872 case Intrinsic::loongarch_csrxchg_w: {
4873 unsigned Imm = N->getConstantOperandVal(4);
4874 if (!isUInt<14>(Imm)) {
4875 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4876 return;
4877 }
4878 SDValue CSRXCHGResults = DAG.getNode(
4879 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4880 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4881 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4882 DAG.getConstant(Imm, DL, GRLenVT)});
4883 Results.push_back(
4884 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4885 Results.push_back(CSRXCHGResults.getValue(1));
4886 break;
4887 }
4888#define IOCSRRD_CASE(NAME, NODE) \
4889 case Intrinsic::loongarch_##NAME: { \
4890 SDValue IOCSRRDResults = \
4891 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4892 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4893 Results.push_back( \
4894 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4895 Results.push_back(IOCSRRDResults.getValue(1)); \
4896 break; \
4897 }
4898 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4899 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4900 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4901#undef IOCSRRD_CASE
4902 case Intrinsic::loongarch_cpucfg: {
4903 SDValue CPUCFGResults =
4904 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4905 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4906 Results.push_back(
4907 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4908 Results.push_back(CPUCFGResults.getValue(1));
4909 break;
4910 }
4911 case Intrinsic::loongarch_lddir_d: {
4912 if (!Subtarget.is64Bit()) {
4913 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4914 return;
4915 }
4916 break;
4917 }
4918 }
4919 break;
4920 }
4921 case ISD::READ_REGISTER: {
4922 if (Subtarget.is64Bit())
4923 DAG.getContext()->emitError(
4924 "On LA64, only 64-bit registers can be read.");
4925 else
4926 DAG.getContext()->emitError(
4927 "On LA32, only 32-bit registers can be read.");
4928 Results.push_back(DAG.getUNDEF(VT));
4929 Results.push_back(N->getOperand(0));
4930 break;
4931 }
4933 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4934 break;
4935 }
4936 case ISD::LROUND: {
4937 SDValue Op0 = N->getOperand(0);
4938 EVT OpVT = Op0.getValueType();
4939 RTLIB::Libcall LC =
4940 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4941 MakeLibCallOptions CallOptions;
4942 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4943 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4944 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4945 Results.push_back(Result);
4946 break;
4947 }
4948 case ISD::ATOMIC_CMP_SWAP: {
4950 break;
4951 }
4952 case ISD::TRUNCATE: {
4953 MVT VT = N->getSimpleValueType(0);
4954 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4955 return;
4956
4957 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4958 SDValue In = N->getOperand(0);
4959 EVT InVT = In.getValueType();
4960 EVT InEltVT = InVT.getVectorElementType();
4961 EVT EltVT = VT.getVectorElementType();
4962 unsigned MinElts = VT.getVectorNumElements();
4963 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4964 unsigned InBits = InVT.getSizeInBits();
4965
4966 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4967 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4968 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4969 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4970 for (unsigned I = 0; I < MinElts; ++I)
4971 TruncMask[I] = Scale * I;
4972
4973 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4974 MVT SVT = In.getSimpleValueType().getScalarType();
4975 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4976 SDValue WidenIn =
4977 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4978 DAG.getVectorIdxConstant(0, DL));
4979 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4980 "Illegal vector type in truncation");
4981 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4982 Results.push_back(
4983 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4984 return;
4985 }
4986 }
4987
4988 break;
4989 }
4990 }
4991}
4992
4995 const LoongArchSubtarget &Subtarget) {
4996 if (DCI.isBeforeLegalizeOps())
4997 return SDValue();
4998
4999 SDValue FirstOperand = N->getOperand(0);
5000 SDValue SecondOperand = N->getOperand(1);
5001 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5002 EVT ValTy = N->getValueType(0);
5003 SDLoc DL(N);
5004 uint64_t lsb, msb;
5005 unsigned SMIdx, SMLen;
5006 ConstantSDNode *CN;
5007 SDValue NewOperand;
5008 MVT GRLenVT = Subtarget.getGRLenVT();
5009
5010 // BSTRPICK requires the 32S feature.
5011 if (!Subtarget.has32S())
5012 return SDValue();
5013
5014 // Op's second operand must be a shifted mask.
5015 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5016 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5017 return SDValue();
5018
5019 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5020 // Pattern match BSTRPICK.
5021 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5022 // => BSTRPICK $dst, $src, msb, lsb
5023 // where msb = lsb + len - 1
5024
5025 // The second operand of the shift must be an immediate.
5026 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5027 return SDValue();
5028
5029 lsb = CN->getZExtValue();
5030
5031 // Return if the shifted mask does not start at bit 0 or the sum of its
5032 // length and lsb exceeds the word's size.
5033 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5034 return SDValue();
5035
5036 NewOperand = FirstOperand.getOperand(0);
5037 } else {
5038 // Pattern match BSTRPICK.
5039 // $dst = and $src, (2**len- 1) , if len > 12
5040 // => BSTRPICK $dst, $src, msb, lsb
5041 // where lsb = 0 and msb = len - 1
5042
5043 // If the mask is <= 0xfff, andi can be used instead.
5044 if (CN->getZExtValue() <= 0xfff)
5045 return SDValue();
5046
5047 // Return if the MSB exceeds.
5048 if (SMIdx + SMLen > ValTy.getSizeInBits())
5049 return SDValue();
5050
5051 if (SMIdx > 0) {
5052 // Omit if the constant has more than 2 uses. This a conservative
5053 // decision. Whether it is a win depends on the HW microarchitecture.
5054 // However it should always be better for 1 and 2 uses.
5055 if (CN->use_size() > 2)
5056 return SDValue();
5057 // Return if the constant can be composed by a single LU12I.W.
5058 if ((CN->getZExtValue() & 0xfff) == 0)
5059 return SDValue();
5060 // Return if the constand can be composed by a single ADDI with
5061 // the zero register.
5062 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5063 return SDValue();
5064 }
5065
5066 lsb = SMIdx;
5067 NewOperand = FirstOperand;
5068 }
5069
5070 msb = lsb + SMLen - 1;
5071 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5072 DAG.getConstant(msb, DL, GRLenVT),
5073 DAG.getConstant(lsb, DL, GRLenVT));
5074 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5075 return NR0;
5076 // Try to optimize to
5077 // bstrpick $Rd, $Rs, msb, lsb
5078 // slli $Rd, $Rd, lsb
5079 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5080 DAG.getConstant(lsb, DL, GRLenVT));
5081}
5082
5085 const LoongArchSubtarget &Subtarget) {
5086 // BSTRPICK requires the 32S feature.
5087 if (!Subtarget.has32S())
5088 return SDValue();
5089
5090 if (DCI.isBeforeLegalizeOps())
5091 return SDValue();
5092
5093 // $dst = srl (and $src, Mask), Shamt
5094 // =>
5095 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5096 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5097 //
5098
5099 SDValue FirstOperand = N->getOperand(0);
5100 ConstantSDNode *CN;
5101 EVT ValTy = N->getValueType(0);
5102 SDLoc DL(N);
5103 MVT GRLenVT = Subtarget.getGRLenVT();
5104 unsigned MaskIdx, MaskLen;
5105 uint64_t Shamt;
5106
5107 // The first operand must be an AND and the second operand of the AND must be
5108 // a shifted mask.
5109 if (FirstOperand.getOpcode() != ISD::AND ||
5110 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5111 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5112 return SDValue();
5113
5114 // The second operand (shift amount) must be an immediate.
5115 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5116 return SDValue();
5117
5118 Shamt = CN->getZExtValue();
5119 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5120 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5121 FirstOperand->getOperand(0),
5122 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5123 DAG.getConstant(Shamt, DL, GRLenVT));
5124
5125 return SDValue();
5126}
5127
5128// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5129// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5130static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5131 unsigned Depth) {
5132 // Limit recursion.
5134 return false;
5135 switch (Src.getOpcode()) {
5136 case ISD::SETCC:
5137 case ISD::TRUNCATE:
5138 return Src.getOperand(0).getValueSizeInBits() == Size;
5139 case ISD::FREEZE:
5140 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5141 case ISD::AND:
5142 case ISD::XOR:
5143 case ISD::OR:
5144 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5145 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5146 case ISD::SELECT:
5147 case ISD::VSELECT:
5148 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5149 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5150 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5151 case ISD::BUILD_VECTOR:
5152 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5153 ISD::isBuildVectorAllOnes(Src.getNode());
5154 }
5155 return false;
5156}
5157
5158// Helper to push sign extension of vXi1 SETCC result through bitops.
5160 SDValue Src, const SDLoc &DL) {
5161 switch (Src.getOpcode()) {
5162 case ISD::SETCC:
5163 case ISD::FREEZE:
5164 case ISD::TRUNCATE:
5165 case ISD::BUILD_VECTOR:
5166 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5167 case ISD::AND:
5168 case ISD::XOR:
5169 case ISD::OR:
5170 return DAG.getNode(
5171 Src.getOpcode(), DL, SExtVT,
5172 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5173 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5174 case ISD::SELECT:
5175 case ISD::VSELECT:
5176 return DAG.getSelect(
5177 DL, SExtVT, Src.getOperand(0),
5178 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5179 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5180 }
5181 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5182}
5183
5184static SDValue
5187 const LoongArchSubtarget &Subtarget) {
5188 SDLoc DL(N);
5189 EVT VT = N->getValueType(0);
5190 SDValue Src = N->getOperand(0);
5191 EVT SrcVT = Src.getValueType();
5192
5193 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5194 return SDValue();
5195
5196 bool UseLASX;
5197 unsigned Opc = ISD::DELETED_NODE;
5198 EVT CmpVT = Src.getOperand(0).getValueType();
5199 EVT EltVT = CmpVT.getVectorElementType();
5200
5201 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5202 UseLASX = false;
5203 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5204 CmpVT.getSizeInBits() == 256)
5205 UseLASX = true;
5206 else
5207 return SDValue();
5208
5209 SDValue SrcN1 = Src.getOperand(1);
5210 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5211 default:
5212 break;
5213 case ISD::SETEQ:
5214 // x == 0 => not (vmsknez.b x)
5215 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5217 break;
5218 case ISD::SETGT:
5219 // x > -1 => vmskgez.b x
5220 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5222 break;
5223 case ISD::SETGE:
5224 // x >= 0 => vmskgez.b x
5225 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5227 break;
5228 case ISD::SETLT:
5229 // x < 0 => vmskltz.{b,h,w,d} x
5230 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5231 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5232 EltVT == MVT::i64))
5234 break;
5235 case ISD::SETLE:
5236 // x <= -1 => vmskltz.{b,h,w,d} x
5237 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5238 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5239 EltVT == MVT::i64))
5241 break;
5242 case ISD::SETNE:
5243 // x != 0 => vmsknez.b x
5244 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5246 break;
5247 }
5248
5249 if (Opc == ISD::DELETED_NODE)
5250 return SDValue();
5251
5252 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5254 V = DAG.getZExtOrTrunc(V, DL, T);
5255 return DAG.getBitcast(VT, V);
5256}
5257
5260 const LoongArchSubtarget &Subtarget) {
5261 SDLoc DL(N);
5262 EVT VT = N->getValueType(0);
5263 SDValue Src = N->getOperand(0);
5264 EVT SrcVT = Src.getValueType();
5265 MVT GRLenVT = Subtarget.getGRLenVT();
5266
5267 if (!DCI.isBeforeLegalizeOps())
5268 return SDValue();
5269
5270 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5271 return SDValue();
5272
5273 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5274 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5275 if (Res)
5276 return Res;
5277
5278 // Generate vXi1 using [X]VMSKLTZ
5279 MVT SExtVT;
5280 unsigned Opc;
5281 bool UseLASX = false;
5282 bool PropagateSExt = false;
5283
5284 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5285 EVT CmpVT = Src.getOperand(0).getValueType();
5286 if (CmpVT.getSizeInBits() > 256)
5287 return SDValue();
5288 }
5289
5290 switch (SrcVT.getSimpleVT().SimpleTy) {
5291 default:
5292 return SDValue();
5293 case MVT::v2i1:
5294 SExtVT = MVT::v2i64;
5295 break;
5296 case MVT::v4i1:
5297 SExtVT = MVT::v4i32;
5298 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5299 SExtVT = MVT::v4i64;
5300 UseLASX = true;
5301 PropagateSExt = true;
5302 }
5303 break;
5304 case MVT::v8i1:
5305 SExtVT = MVT::v8i16;
5306 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5307 SExtVT = MVT::v8i32;
5308 UseLASX = true;
5309 PropagateSExt = true;
5310 }
5311 break;
5312 case MVT::v16i1:
5313 SExtVT = MVT::v16i8;
5314 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5315 SExtVT = MVT::v16i16;
5316 UseLASX = true;
5317 PropagateSExt = true;
5318 }
5319 break;
5320 case MVT::v32i1:
5321 SExtVT = MVT::v32i8;
5322 UseLASX = true;
5323 break;
5324 };
5325 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5326 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5327
5328 SDValue V;
5329 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5330 if (Src.getSimpleValueType() == MVT::v32i8) {
5331 SDValue Lo, Hi;
5332 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5333 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5334 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5335 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5336 DAG.getConstant(16, DL, MVT::i8));
5337 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5338 } else if (UseLASX) {
5339 return SDValue();
5340 }
5341 }
5342
5343 if (!V) {
5345 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5346 }
5347
5349 V = DAG.getZExtOrTrunc(V, DL, T);
5350 return DAG.getBitcast(VT, V);
5351}
5352
5355 const LoongArchSubtarget &Subtarget) {
5356 MVT GRLenVT = Subtarget.getGRLenVT();
5357 EVT ValTy = N->getValueType(0);
5358 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5359 ConstantSDNode *CN0, *CN1;
5360 SDLoc DL(N);
5361 unsigned ValBits = ValTy.getSizeInBits();
5362 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5363 unsigned Shamt;
5364 bool SwapAndRetried = false;
5365
5366 // BSTRPICK requires the 32S feature.
5367 if (!Subtarget.has32S())
5368 return SDValue();
5369
5370 if (DCI.isBeforeLegalizeOps())
5371 return SDValue();
5372
5373 if (ValBits != 32 && ValBits != 64)
5374 return SDValue();
5375
5376Retry:
5377 // 1st pattern to match BSTRINS:
5378 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5379 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5380 // =>
5381 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5382 if (N0.getOpcode() == ISD::AND &&
5383 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5384 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5385 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5386 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5387 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5388 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5389 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5390 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5391 (MaskIdx0 + MaskLen0 <= ValBits)) {
5392 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5393 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5394 N1.getOperand(0).getOperand(0),
5395 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5396 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5397 }
5398
5399 // 2nd pattern to match BSTRINS:
5400 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5401 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5402 // =>
5403 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5404 if (N0.getOpcode() == ISD::AND &&
5405 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5406 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5407 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5408 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5409 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5410 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5411 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5412 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5413 (MaskIdx0 + MaskLen0 <= ValBits)) {
5414 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5415 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5416 N1.getOperand(0).getOperand(0),
5417 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5418 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5419 }
5420
5421 // 3rd pattern to match BSTRINS:
5422 // R = or (and X, mask0), (and Y, mask1)
5423 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5424 // =>
5425 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5426 // where msb = lsb + size - 1
5427 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5428 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5429 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5430 (MaskIdx0 + MaskLen0 <= 64) &&
5431 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5432 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5433 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5434 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5435 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5436 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5437 DAG.getConstant(ValBits == 32
5438 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5439 : (MaskIdx0 + MaskLen0 - 1),
5440 DL, GRLenVT),
5441 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5442 }
5443
5444 // 4th pattern to match BSTRINS:
5445 // R = or (and X, mask), (shl Y, shamt)
5446 // where mask = (2**shamt - 1)
5447 // =>
5448 // R = BSTRINS X, Y, ValBits - 1, shamt
5449 // where ValBits = 32 or 64
5450 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5451 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5452 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5453 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5454 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5455 (MaskIdx0 + MaskLen0 <= ValBits)) {
5456 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5457 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5458 N1.getOperand(0),
5459 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5460 DAG.getConstant(Shamt, DL, GRLenVT));
5461 }
5462
5463 // 5th pattern to match BSTRINS:
5464 // R = or (and X, mask), const
5465 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5466 // =>
5467 // R = BSTRINS X, (const >> lsb), msb, lsb
5468 // where msb = lsb + size - 1
5469 if (N0.getOpcode() == ISD::AND &&
5470 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5471 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5472 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5473 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5474 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5475 return DAG.getNode(
5476 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5477 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5478 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5479 : (MaskIdx0 + MaskLen0 - 1),
5480 DL, GRLenVT),
5481 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5482 }
5483
5484 // 6th pattern.
5485 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5486 // by the incoming bits are known to be zero.
5487 // =>
5488 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5489 //
5490 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5491 // pattern is more common than the 1st. So we put the 1st before the 6th in
5492 // order to match as many nodes as possible.
5493 ConstantSDNode *CNMask, *CNShamt;
5494 unsigned MaskIdx, MaskLen;
5495 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5496 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5497 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5498 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5499 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5500 Shamt = CNShamt->getZExtValue();
5501 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5502 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5503 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5504 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5505 N1.getOperand(0).getOperand(0),
5506 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5507 DAG.getConstant(Shamt, DL, GRLenVT));
5508 }
5509 }
5510
5511 // 7th pattern.
5512 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5513 // overwritten by the incoming bits are known to be zero.
5514 // =>
5515 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5516 //
5517 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5518 // before the 7th in order to match as many nodes as possible.
5519 if (N1.getOpcode() == ISD::AND &&
5520 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5521 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5522 N1.getOperand(0).getOpcode() == ISD::SHL &&
5523 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5524 CNShamt->getZExtValue() == MaskIdx) {
5525 APInt ShMask(ValBits, CNMask->getZExtValue());
5526 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5527 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5528 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5529 N1.getOperand(0).getOperand(0),
5530 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5531 DAG.getConstant(MaskIdx, DL, GRLenVT));
5532 }
5533 }
5534
5535 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5536 if (!SwapAndRetried) {
5537 std::swap(N0, N1);
5538 SwapAndRetried = true;
5539 goto Retry;
5540 }
5541
5542 SwapAndRetried = false;
5543Retry2:
5544 // 8th pattern.
5545 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5546 // the incoming bits are known to be zero.
5547 // =>
5548 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5549 //
5550 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5551 // we put it here in order to match as many nodes as possible or generate less
5552 // instructions.
5553 if (N1.getOpcode() == ISD::AND &&
5554 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5555 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5556 APInt ShMask(ValBits, CNMask->getZExtValue());
5557 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5558 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5559 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5560 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5561 N1->getOperand(0),
5562 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5563 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5564 DAG.getConstant(MaskIdx, DL, GRLenVT));
5565 }
5566 }
5567 // Swap N0/N1 and retry.
5568 if (!SwapAndRetried) {
5569 std::swap(N0, N1);
5570 SwapAndRetried = true;
5571 goto Retry2;
5572 }
5573
5574 return SDValue();
5575}
5576
5577static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5578 ExtType = ISD::NON_EXTLOAD;
5579
5580 switch (V.getNode()->getOpcode()) {
5581 case ISD::LOAD: {
5582 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5583 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5584 (LoadNode->getMemoryVT() == MVT::i16)) {
5585 ExtType = LoadNode->getExtensionType();
5586 return true;
5587 }
5588 return false;
5589 }
5590 case ISD::AssertSext: {
5591 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5592 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5593 ExtType = ISD::SEXTLOAD;
5594 return true;
5595 }
5596 return false;
5597 }
5598 case ISD::AssertZext: {
5599 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5600 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5601 ExtType = ISD::ZEXTLOAD;
5602 return true;
5603 }
5604 return false;
5605 }
5606 default:
5607 return false;
5608 }
5609
5610 return false;
5611}
5612
5613// Eliminate redundant truncation and zero-extension nodes.
5614// * Case 1:
5615// +------------+ +------------+ +------------+
5616// | Input1 | | Input2 | | CC |
5617// +------------+ +------------+ +------------+
5618// | | |
5619// V V +----+
5620// +------------+ +------------+ |
5621// | TRUNCATE | | TRUNCATE | |
5622// +------------+ +------------+ |
5623// | | |
5624// V V |
5625// +------------+ +------------+ |
5626// | ZERO_EXT | | ZERO_EXT | |
5627// +------------+ +------------+ |
5628// | | |
5629// | +-------------+ |
5630// V V | |
5631// +----------------+ | |
5632// | AND | | |
5633// +----------------+ | |
5634// | | |
5635// +---------------+ | |
5636// | | |
5637// V V V
5638// +-------------+
5639// | CMP |
5640// +-------------+
5641// * Case 2:
5642// +------------+ +------------+ +-------------+ +------------+ +------------+
5643// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5644// +------------+ +------------+ +-------------+ +------------+ +------------+
5645// | | | | |
5646// V | | | |
5647// +------------+ | | | |
5648// | XOR |<---------------------+ | |
5649// +------------+ | | |
5650// | | | |
5651// V V +---------------+ |
5652// +------------+ +------------+ | |
5653// | TRUNCATE | | TRUNCATE | | +-------------------------+
5654// +------------+ +------------+ | |
5655// | | | |
5656// V V | |
5657// +------------+ +------------+ | |
5658// | ZERO_EXT | | ZERO_EXT | | |
5659// +------------+ +------------+ | |
5660// | | | |
5661// V V | |
5662// +----------------+ | |
5663// | AND | | |
5664// +----------------+ | |
5665// | | |
5666// +---------------+ | |
5667// | | |
5668// V V V
5669// +-------------+
5670// | CMP |
5671// +-------------+
5674 const LoongArchSubtarget &Subtarget) {
5675 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5676
5677 SDNode *AndNode = N->getOperand(0).getNode();
5678 if (AndNode->getOpcode() != ISD::AND)
5679 return SDValue();
5680
5681 SDValue AndInputValue2 = AndNode->getOperand(1);
5682 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5683 return SDValue();
5684
5685 SDValue CmpInputValue = N->getOperand(1);
5686 SDValue AndInputValue1 = AndNode->getOperand(0);
5687 if (AndInputValue1.getOpcode() == ISD::XOR) {
5688 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5689 return SDValue();
5690 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5691 if (!CN || CN->getSExtValue() != -1)
5692 return SDValue();
5693 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5694 if (!CN || CN->getSExtValue() != 0)
5695 return SDValue();
5696 AndInputValue1 = AndInputValue1.getOperand(0);
5697 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5698 return SDValue();
5699 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5700 if (AndInputValue2 != CmpInputValue)
5701 return SDValue();
5702 } else {
5703 return SDValue();
5704 }
5705
5706 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5707 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5708 return SDValue();
5709
5710 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5711 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5712 return SDValue();
5713
5714 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5715 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5716 ISD::LoadExtType ExtType1;
5717 ISD::LoadExtType ExtType2;
5718
5719 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5720 !checkValueWidth(TruncInputValue2, ExtType2))
5721 return SDValue();
5722
5723 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5724 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5725 return SDValue();
5726
5727 if ((ExtType2 != ISD::ZEXTLOAD) &&
5728 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5729 return SDValue();
5730
5731 // These truncation and zero-extension nodes are not necessary, remove them.
5732 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5733 TruncInputValue1, TruncInputValue2);
5734 SDValue NewSetCC =
5735 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5736 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5737 return SDValue(N, 0);
5738}
5739
5740// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5743 const LoongArchSubtarget &Subtarget) {
5744 if (DCI.isBeforeLegalizeOps())
5745 return SDValue();
5746
5747 SDValue Src = N->getOperand(0);
5748 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5749 return SDValue();
5750
5751 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5752 Src.getOperand(0));
5753}
5754
5755// Perform common combines for BR_CC and SELECT_CC conditions.
5756static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5757 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5758 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5759
5760 // As far as arithmetic right shift always saves the sign,
5761 // shift can be omitted.
5762 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5763 // setge (sra X, N), 0 -> setge X, 0
5764 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5765 LHS.getOpcode() == ISD::SRA) {
5766 LHS = LHS.getOperand(0);
5767 return true;
5768 }
5769
5770 if (!ISD::isIntEqualitySetCC(CCVal))
5771 return false;
5772
5773 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5774 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5775 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5776 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5777 // If we're looking for eq 0 instead of ne 0, we need to invert the
5778 // condition.
5779 bool Invert = CCVal == ISD::SETEQ;
5780 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5781 if (Invert)
5782 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5783
5784 RHS = LHS.getOperand(1);
5785 LHS = LHS.getOperand(0);
5786 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5787
5788 CC = DAG.getCondCode(CCVal);
5789 return true;
5790 }
5791
5792 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5793 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5794 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5795 SDValue LHS0 = LHS.getOperand(0);
5796 if (LHS0.getOpcode() == ISD::AND &&
5797 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5798 uint64_t Mask = LHS0.getConstantOperandVal(1);
5799 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5800 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5801 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5802 CC = DAG.getCondCode(CCVal);
5803
5804 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5805 LHS = LHS0.getOperand(0);
5806 if (ShAmt != 0)
5807 LHS =
5808 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5809 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5810 return true;
5811 }
5812 }
5813 }
5814
5815 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5816 // This can occur when legalizing some floating point comparisons.
5817 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5818 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5819 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5820 CC = DAG.getCondCode(CCVal);
5821 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5822 return true;
5823 }
5824
5825 return false;
5826}
5827
5830 const LoongArchSubtarget &Subtarget) {
5831 SDValue LHS = N->getOperand(1);
5832 SDValue RHS = N->getOperand(2);
5833 SDValue CC = N->getOperand(3);
5834 SDLoc DL(N);
5835
5836 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5837 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5838 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5839
5840 return SDValue();
5841}
5842
5845 const LoongArchSubtarget &Subtarget) {
5846 // Transform
5847 SDValue LHS = N->getOperand(0);
5848 SDValue RHS = N->getOperand(1);
5849 SDValue CC = N->getOperand(2);
5850 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5851 SDValue TrueV = N->getOperand(3);
5852 SDValue FalseV = N->getOperand(4);
5853 SDLoc DL(N);
5854 EVT VT = N->getValueType(0);
5855
5856 // If the True and False values are the same, we don't need a select_cc.
5857 if (TrueV == FalseV)
5858 return TrueV;
5859
5860 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5861 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5862 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5864 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5865 if (CCVal == ISD::CondCode::SETGE)
5866 std::swap(TrueV, FalseV);
5867
5868 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5869 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5870 // Only handle simm12, if it is not in this range, it can be considered as
5871 // register.
5872 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5873 isInt<12>(TrueSImm - FalseSImm)) {
5874 SDValue SRA =
5875 DAG.getNode(ISD::SRA, DL, VT, LHS,
5876 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5877 SDValue AND =
5878 DAG.getNode(ISD::AND, DL, VT, SRA,
5879 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5880 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5881 }
5882
5883 if (CCVal == ISD::CondCode::SETGE)
5884 std::swap(TrueV, FalseV);
5885 }
5886
5887 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5888 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5889 {LHS, RHS, CC, TrueV, FalseV});
5890
5891 return SDValue();
5892}
5893
5894template <unsigned N>
5896 SelectionDAG &DAG,
5897 const LoongArchSubtarget &Subtarget,
5898 bool IsSigned = false) {
5899 SDLoc DL(Node);
5900 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5901 // Check the ImmArg.
5902 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5903 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5904 DAG.getContext()->emitError(Node->getOperationName(0) +
5905 ": argument out of range.");
5906 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5907 }
5908 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5909}
5910
5911template <unsigned N>
5912static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5913 SelectionDAG &DAG, bool IsSigned = false) {
5914 SDLoc DL(Node);
5915 EVT ResTy = Node->getValueType(0);
5916 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5917
5918 // Check the ImmArg.
5919 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5920 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5921 DAG.getContext()->emitError(Node->getOperationName(0) +
5922 ": argument out of range.");
5923 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5924 }
5925 return DAG.getConstant(
5927 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5928 DL, ResTy);
5929}
5930
5932 SDLoc DL(Node);
5933 EVT ResTy = Node->getValueType(0);
5934 SDValue Vec = Node->getOperand(2);
5935 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5936 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5937}
5938
5940 SDLoc DL(Node);
5941 EVT ResTy = Node->getValueType(0);
5942 SDValue One = DAG.getConstant(1, DL, ResTy);
5943 SDValue Bit =
5944 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5945
5946 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5947 DAG.getNOT(DL, Bit, ResTy));
5948}
5949
5950template <unsigned N>
5952 SDLoc DL(Node);
5953 EVT ResTy = Node->getValueType(0);
5954 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5955 // Check the unsigned ImmArg.
5956 if (!isUInt<N>(CImm->getZExtValue())) {
5957 DAG.getContext()->emitError(Node->getOperationName(0) +
5958 ": argument out of range.");
5959 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5960 }
5961
5962 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5963 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5964
5965 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5966}
5967
5968template <unsigned N>
5970 SDLoc DL(Node);
5971 EVT ResTy = Node->getValueType(0);
5972 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5973 // Check the unsigned ImmArg.
5974 if (!isUInt<N>(CImm->getZExtValue())) {
5975 DAG.getContext()->emitError(Node->getOperationName(0) +
5976 ": argument out of range.");
5977 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5978 }
5979
5980 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5981 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5982 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5983}
5984
5985template <unsigned N>
5987 SDLoc DL(Node);
5988 EVT ResTy = Node->getValueType(0);
5989 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5990 // Check the unsigned ImmArg.
5991 if (!isUInt<N>(CImm->getZExtValue())) {
5992 DAG.getContext()->emitError(Node->getOperationName(0) +
5993 ": argument out of range.");
5994 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5995 }
5996
5997 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5998 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5999 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6000}
6001
6002template <unsigned W>
6004 unsigned ResOp) {
6005 unsigned Imm = N->getConstantOperandVal(2);
6006 if (!isUInt<W>(Imm)) {
6007 const StringRef ErrorMsg = "argument out of range";
6008 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6009 return DAG.getUNDEF(N->getValueType(0));
6010 }
6011 SDLoc DL(N);
6012 SDValue Vec = N->getOperand(1);
6013 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6015 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6016}
6017
6018static SDValue
6021 const LoongArchSubtarget &Subtarget) {
6022 SDLoc DL(N);
6023 switch (N->getConstantOperandVal(0)) {
6024 default:
6025 break;
6026 case Intrinsic::loongarch_lsx_vadd_b:
6027 case Intrinsic::loongarch_lsx_vadd_h:
6028 case Intrinsic::loongarch_lsx_vadd_w:
6029 case Intrinsic::loongarch_lsx_vadd_d:
6030 case Intrinsic::loongarch_lasx_xvadd_b:
6031 case Intrinsic::loongarch_lasx_xvadd_h:
6032 case Intrinsic::loongarch_lasx_xvadd_w:
6033 case Intrinsic::loongarch_lasx_xvadd_d:
6034 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6035 N->getOperand(2));
6036 case Intrinsic::loongarch_lsx_vaddi_bu:
6037 case Intrinsic::loongarch_lsx_vaddi_hu:
6038 case Intrinsic::loongarch_lsx_vaddi_wu:
6039 case Intrinsic::loongarch_lsx_vaddi_du:
6040 case Intrinsic::loongarch_lasx_xvaddi_bu:
6041 case Intrinsic::loongarch_lasx_xvaddi_hu:
6042 case Intrinsic::loongarch_lasx_xvaddi_wu:
6043 case Intrinsic::loongarch_lasx_xvaddi_du:
6044 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6045 lowerVectorSplatImm<5>(N, 2, DAG));
6046 case Intrinsic::loongarch_lsx_vsub_b:
6047 case Intrinsic::loongarch_lsx_vsub_h:
6048 case Intrinsic::loongarch_lsx_vsub_w:
6049 case Intrinsic::loongarch_lsx_vsub_d:
6050 case Intrinsic::loongarch_lasx_xvsub_b:
6051 case Intrinsic::loongarch_lasx_xvsub_h:
6052 case Intrinsic::loongarch_lasx_xvsub_w:
6053 case Intrinsic::loongarch_lasx_xvsub_d:
6054 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6055 N->getOperand(2));
6056 case Intrinsic::loongarch_lsx_vsubi_bu:
6057 case Intrinsic::loongarch_lsx_vsubi_hu:
6058 case Intrinsic::loongarch_lsx_vsubi_wu:
6059 case Intrinsic::loongarch_lsx_vsubi_du:
6060 case Intrinsic::loongarch_lasx_xvsubi_bu:
6061 case Intrinsic::loongarch_lasx_xvsubi_hu:
6062 case Intrinsic::loongarch_lasx_xvsubi_wu:
6063 case Intrinsic::loongarch_lasx_xvsubi_du:
6064 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6065 lowerVectorSplatImm<5>(N, 2, DAG));
6066 case Intrinsic::loongarch_lsx_vneg_b:
6067 case Intrinsic::loongarch_lsx_vneg_h:
6068 case Intrinsic::loongarch_lsx_vneg_w:
6069 case Intrinsic::loongarch_lsx_vneg_d:
6070 case Intrinsic::loongarch_lasx_xvneg_b:
6071 case Intrinsic::loongarch_lasx_xvneg_h:
6072 case Intrinsic::loongarch_lasx_xvneg_w:
6073 case Intrinsic::loongarch_lasx_xvneg_d:
6074 return DAG.getNode(
6075 ISD::SUB, DL, N->getValueType(0),
6076 DAG.getConstant(
6077 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6078 /*isSigned=*/true),
6079 SDLoc(N), N->getValueType(0)),
6080 N->getOperand(1));
6081 case Intrinsic::loongarch_lsx_vmax_b:
6082 case Intrinsic::loongarch_lsx_vmax_h:
6083 case Intrinsic::loongarch_lsx_vmax_w:
6084 case Intrinsic::loongarch_lsx_vmax_d:
6085 case Intrinsic::loongarch_lasx_xvmax_b:
6086 case Intrinsic::loongarch_lasx_xvmax_h:
6087 case Intrinsic::loongarch_lasx_xvmax_w:
6088 case Intrinsic::loongarch_lasx_xvmax_d:
6089 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6090 N->getOperand(2));
6091 case Intrinsic::loongarch_lsx_vmax_bu:
6092 case Intrinsic::loongarch_lsx_vmax_hu:
6093 case Intrinsic::loongarch_lsx_vmax_wu:
6094 case Intrinsic::loongarch_lsx_vmax_du:
6095 case Intrinsic::loongarch_lasx_xvmax_bu:
6096 case Intrinsic::loongarch_lasx_xvmax_hu:
6097 case Intrinsic::loongarch_lasx_xvmax_wu:
6098 case Intrinsic::loongarch_lasx_xvmax_du:
6099 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6100 N->getOperand(2));
6101 case Intrinsic::loongarch_lsx_vmaxi_b:
6102 case Intrinsic::loongarch_lsx_vmaxi_h:
6103 case Intrinsic::loongarch_lsx_vmaxi_w:
6104 case Intrinsic::loongarch_lsx_vmaxi_d:
6105 case Intrinsic::loongarch_lasx_xvmaxi_b:
6106 case Intrinsic::loongarch_lasx_xvmaxi_h:
6107 case Intrinsic::loongarch_lasx_xvmaxi_w:
6108 case Intrinsic::loongarch_lasx_xvmaxi_d:
6109 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6110 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6111 case Intrinsic::loongarch_lsx_vmaxi_bu:
6112 case Intrinsic::loongarch_lsx_vmaxi_hu:
6113 case Intrinsic::loongarch_lsx_vmaxi_wu:
6114 case Intrinsic::loongarch_lsx_vmaxi_du:
6115 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6116 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6117 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6118 case Intrinsic::loongarch_lasx_xvmaxi_du:
6119 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6120 lowerVectorSplatImm<5>(N, 2, DAG));
6121 case Intrinsic::loongarch_lsx_vmin_b:
6122 case Intrinsic::loongarch_lsx_vmin_h:
6123 case Intrinsic::loongarch_lsx_vmin_w:
6124 case Intrinsic::loongarch_lsx_vmin_d:
6125 case Intrinsic::loongarch_lasx_xvmin_b:
6126 case Intrinsic::loongarch_lasx_xvmin_h:
6127 case Intrinsic::loongarch_lasx_xvmin_w:
6128 case Intrinsic::loongarch_lasx_xvmin_d:
6129 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6130 N->getOperand(2));
6131 case Intrinsic::loongarch_lsx_vmin_bu:
6132 case Intrinsic::loongarch_lsx_vmin_hu:
6133 case Intrinsic::loongarch_lsx_vmin_wu:
6134 case Intrinsic::loongarch_lsx_vmin_du:
6135 case Intrinsic::loongarch_lasx_xvmin_bu:
6136 case Intrinsic::loongarch_lasx_xvmin_hu:
6137 case Intrinsic::loongarch_lasx_xvmin_wu:
6138 case Intrinsic::loongarch_lasx_xvmin_du:
6139 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6140 N->getOperand(2));
6141 case Intrinsic::loongarch_lsx_vmini_b:
6142 case Intrinsic::loongarch_lsx_vmini_h:
6143 case Intrinsic::loongarch_lsx_vmini_w:
6144 case Intrinsic::loongarch_lsx_vmini_d:
6145 case Intrinsic::loongarch_lasx_xvmini_b:
6146 case Intrinsic::loongarch_lasx_xvmini_h:
6147 case Intrinsic::loongarch_lasx_xvmini_w:
6148 case Intrinsic::loongarch_lasx_xvmini_d:
6149 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6150 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6151 case Intrinsic::loongarch_lsx_vmini_bu:
6152 case Intrinsic::loongarch_lsx_vmini_hu:
6153 case Intrinsic::loongarch_lsx_vmini_wu:
6154 case Intrinsic::loongarch_lsx_vmini_du:
6155 case Intrinsic::loongarch_lasx_xvmini_bu:
6156 case Intrinsic::loongarch_lasx_xvmini_hu:
6157 case Intrinsic::loongarch_lasx_xvmini_wu:
6158 case Intrinsic::loongarch_lasx_xvmini_du:
6159 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6160 lowerVectorSplatImm<5>(N, 2, DAG));
6161 case Intrinsic::loongarch_lsx_vmul_b:
6162 case Intrinsic::loongarch_lsx_vmul_h:
6163 case Intrinsic::loongarch_lsx_vmul_w:
6164 case Intrinsic::loongarch_lsx_vmul_d:
6165 case Intrinsic::loongarch_lasx_xvmul_b:
6166 case Intrinsic::loongarch_lasx_xvmul_h:
6167 case Intrinsic::loongarch_lasx_xvmul_w:
6168 case Intrinsic::loongarch_lasx_xvmul_d:
6169 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6170 N->getOperand(2));
6171 case Intrinsic::loongarch_lsx_vmadd_b:
6172 case Intrinsic::loongarch_lsx_vmadd_h:
6173 case Intrinsic::loongarch_lsx_vmadd_w:
6174 case Intrinsic::loongarch_lsx_vmadd_d:
6175 case Intrinsic::loongarch_lasx_xvmadd_b:
6176 case Intrinsic::loongarch_lasx_xvmadd_h:
6177 case Intrinsic::loongarch_lasx_xvmadd_w:
6178 case Intrinsic::loongarch_lasx_xvmadd_d: {
6179 EVT ResTy = N->getValueType(0);
6180 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6181 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6182 N->getOperand(3)));
6183 }
6184 case Intrinsic::loongarch_lsx_vmsub_b:
6185 case Intrinsic::loongarch_lsx_vmsub_h:
6186 case Intrinsic::loongarch_lsx_vmsub_w:
6187 case Intrinsic::loongarch_lsx_vmsub_d:
6188 case Intrinsic::loongarch_lasx_xvmsub_b:
6189 case Intrinsic::loongarch_lasx_xvmsub_h:
6190 case Intrinsic::loongarch_lasx_xvmsub_w:
6191 case Intrinsic::loongarch_lasx_xvmsub_d: {
6192 EVT ResTy = N->getValueType(0);
6193 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6194 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6195 N->getOperand(3)));
6196 }
6197 case Intrinsic::loongarch_lsx_vdiv_b:
6198 case Intrinsic::loongarch_lsx_vdiv_h:
6199 case Intrinsic::loongarch_lsx_vdiv_w:
6200 case Intrinsic::loongarch_lsx_vdiv_d:
6201 case Intrinsic::loongarch_lasx_xvdiv_b:
6202 case Intrinsic::loongarch_lasx_xvdiv_h:
6203 case Intrinsic::loongarch_lasx_xvdiv_w:
6204 case Intrinsic::loongarch_lasx_xvdiv_d:
6205 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6206 N->getOperand(2));
6207 case Intrinsic::loongarch_lsx_vdiv_bu:
6208 case Intrinsic::loongarch_lsx_vdiv_hu:
6209 case Intrinsic::loongarch_lsx_vdiv_wu:
6210 case Intrinsic::loongarch_lsx_vdiv_du:
6211 case Intrinsic::loongarch_lasx_xvdiv_bu:
6212 case Intrinsic::loongarch_lasx_xvdiv_hu:
6213 case Intrinsic::loongarch_lasx_xvdiv_wu:
6214 case Intrinsic::loongarch_lasx_xvdiv_du:
6215 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6216 N->getOperand(2));
6217 case Intrinsic::loongarch_lsx_vmod_b:
6218 case Intrinsic::loongarch_lsx_vmod_h:
6219 case Intrinsic::loongarch_lsx_vmod_w:
6220 case Intrinsic::loongarch_lsx_vmod_d:
6221 case Intrinsic::loongarch_lasx_xvmod_b:
6222 case Intrinsic::loongarch_lasx_xvmod_h:
6223 case Intrinsic::loongarch_lasx_xvmod_w:
6224 case Intrinsic::loongarch_lasx_xvmod_d:
6225 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6226 N->getOperand(2));
6227 case Intrinsic::loongarch_lsx_vmod_bu:
6228 case Intrinsic::loongarch_lsx_vmod_hu:
6229 case Intrinsic::loongarch_lsx_vmod_wu:
6230 case Intrinsic::loongarch_lsx_vmod_du:
6231 case Intrinsic::loongarch_lasx_xvmod_bu:
6232 case Intrinsic::loongarch_lasx_xvmod_hu:
6233 case Intrinsic::loongarch_lasx_xvmod_wu:
6234 case Intrinsic::loongarch_lasx_xvmod_du:
6235 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6236 N->getOperand(2));
6237 case Intrinsic::loongarch_lsx_vand_v:
6238 case Intrinsic::loongarch_lasx_xvand_v:
6239 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6240 N->getOperand(2));
6241 case Intrinsic::loongarch_lsx_vor_v:
6242 case Intrinsic::loongarch_lasx_xvor_v:
6243 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6244 N->getOperand(2));
6245 case Intrinsic::loongarch_lsx_vxor_v:
6246 case Intrinsic::loongarch_lasx_xvxor_v:
6247 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6248 N->getOperand(2));
6249 case Intrinsic::loongarch_lsx_vnor_v:
6250 case Intrinsic::loongarch_lasx_xvnor_v: {
6251 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6252 N->getOperand(2));
6253 return DAG.getNOT(DL, Res, Res->getValueType(0));
6254 }
6255 case Intrinsic::loongarch_lsx_vandi_b:
6256 case Intrinsic::loongarch_lasx_xvandi_b:
6257 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6258 lowerVectorSplatImm<8>(N, 2, DAG));
6259 case Intrinsic::loongarch_lsx_vori_b:
6260 case Intrinsic::loongarch_lasx_xvori_b:
6261 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6262 lowerVectorSplatImm<8>(N, 2, DAG));
6263 case Intrinsic::loongarch_lsx_vxori_b:
6264 case Intrinsic::loongarch_lasx_xvxori_b:
6265 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6266 lowerVectorSplatImm<8>(N, 2, DAG));
6267 case Intrinsic::loongarch_lsx_vsll_b:
6268 case Intrinsic::loongarch_lsx_vsll_h:
6269 case Intrinsic::loongarch_lsx_vsll_w:
6270 case Intrinsic::loongarch_lsx_vsll_d:
6271 case Intrinsic::loongarch_lasx_xvsll_b:
6272 case Intrinsic::loongarch_lasx_xvsll_h:
6273 case Intrinsic::loongarch_lasx_xvsll_w:
6274 case Intrinsic::loongarch_lasx_xvsll_d:
6275 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6276 truncateVecElts(N, DAG));
6277 case Intrinsic::loongarch_lsx_vslli_b:
6278 case Intrinsic::loongarch_lasx_xvslli_b:
6279 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6280 lowerVectorSplatImm<3>(N, 2, DAG));
6281 case Intrinsic::loongarch_lsx_vslli_h:
6282 case Intrinsic::loongarch_lasx_xvslli_h:
6283 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6284 lowerVectorSplatImm<4>(N, 2, DAG));
6285 case Intrinsic::loongarch_lsx_vslli_w:
6286 case Intrinsic::loongarch_lasx_xvslli_w:
6287 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6288 lowerVectorSplatImm<5>(N, 2, DAG));
6289 case Intrinsic::loongarch_lsx_vslli_d:
6290 case Intrinsic::loongarch_lasx_xvslli_d:
6291 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6292 lowerVectorSplatImm<6>(N, 2, DAG));
6293 case Intrinsic::loongarch_lsx_vsrl_b:
6294 case Intrinsic::loongarch_lsx_vsrl_h:
6295 case Intrinsic::loongarch_lsx_vsrl_w:
6296 case Intrinsic::loongarch_lsx_vsrl_d:
6297 case Intrinsic::loongarch_lasx_xvsrl_b:
6298 case Intrinsic::loongarch_lasx_xvsrl_h:
6299 case Intrinsic::loongarch_lasx_xvsrl_w:
6300 case Intrinsic::loongarch_lasx_xvsrl_d:
6301 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6302 truncateVecElts(N, DAG));
6303 case Intrinsic::loongarch_lsx_vsrli_b:
6304 case Intrinsic::loongarch_lasx_xvsrli_b:
6305 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6306 lowerVectorSplatImm<3>(N, 2, DAG));
6307 case Intrinsic::loongarch_lsx_vsrli_h:
6308 case Intrinsic::loongarch_lasx_xvsrli_h:
6309 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6310 lowerVectorSplatImm<4>(N, 2, DAG));
6311 case Intrinsic::loongarch_lsx_vsrli_w:
6312 case Intrinsic::loongarch_lasx_xvsrli_w:
6313 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6314 lowerVectorSplatImm<5>(N, 2, DAG));
6315 case Intrinsic::loongarch_lsx_vsrli_d:
6316 case Intrinsic::loongarch_lasx_xvsrli_d:
6317 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6318 lowerVectorSplatImm<6>(N, 2, DAG));
6319 case Intrinsic::loongarch_lsx_vsra_b:
6320 case Intrinsic::loongarch_lsx_vsra_h:
6321 case Intrinsic::loongarch_lsx_vsra_w:
6322 case Intrinsic::loongarch_lsx_vsra_d:
6323 case Intrinsic::loongarch_lasx_xvsra_b:
6324 case Intrinsic::loongarch_lasx_xvsra_h:
6325 case Intrinsic::loongarch_lasx_xvsra_w:
6326 case Intrinsic::loongarch_lasx_xvsra_d:
6327 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6328 truncateVecElts(N, DAG));
6329 case Intrinsic::loongarch_lsx_vsrai_b:
6330 case Intrinsic::loongarch_lasx_xvsrai_b:
6331 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6332 lowerVectorSplatImm<3>(N, 2, DAG));
6333 case Intrinsic::loongarch_lsx_vsrai_h:
6334 case Intrinsic::loongarch_lasx_xvsrai_h:
6335 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6336 lowerVectorSplatImm<4>(N, 2, DAG));
6337 case Intrinsic::loongarch_lsx_vsrai_w:
6338 case Intrinsic::loongarch_lasx_xvsrai_w:
6339 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6340 lowerVectorSplatImm<5>(N, 2, DAG));
6341 case Intrinsic::loongarch_lsx_vsrai_d:
6342 case Intrinsic::loongarch_lasx_xvsrai_d:
6343 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6344 lowerVectorSplatImm<6>(N, 2, DAG));
6345 case Intrinsic::loongarch_lsx_vclz_b:
6346 case Intrinsic::loongarch_lsx_vclz_h:
6347 case Intrinsic::loongarch_lsx_vclz_w:
6348 case Intrinsic::loongarch_lsx_vclz_d:
6349 case Intrinsic::loongarch_lasx_xvclz_b:
6350 case Intrinsic::loongarch_lasx_xvclz_h:
6351 case Intrinsic::loongarch_lasx_xvclz_w:
6352 case Intrinsic::loongarch_lasx_xvclz_d:
6353 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6354 case Intrinsic::loongarch_lsx_vpcnt_b:
6355 case Intrinsic::loongarch_lsx_vpcnt_h:
6356 case Intrinsic::loongarch_lsx_vpcnt_w:
6357 case Intrinsic::loongarch_lsx_vpcnt_d:
6358 case Intrinsic::loongarch_lasx_xvpcnt_b:
6359 case Intrinsic::loongarch_lasx_xvpcnt_h:
6360 case Intrinsic::loongarch_lasx_xvpcnt_w:
6361 case Intrinsic::loongarch_lasx_xvpcnt_d:
6362 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6363 case Intrinsic::loongarch_lsx_vbitclr_b:
6364 case Intrinsic::loongarch_lsx_vbitclr_h:
6365 case Intrinsic::loongarch_lsx_vbitclr_w:
6366 case Intrinsic::loongarch_lsx_vbitclr_d:
6367 case Intrinsic::loongarch_lasx_xvbitclr_b:
6368 case Intrinsic::loongarch_lasx_xvbitclr_h:
6369 case Intrinsic::loongarch_lasx_xvbitclr_w:
6370 case Intrinsic::loongarch_lasx_xvbitclr_d:
6371 return lowerVectorBitClear(N, DAG);
6372 case Intrinsic::loongarch_lsx_vbitclri_b:
6373 case Intrinsic::loongarch_lasx_xvbitclri_b:
6374 return lowerVectorBitClearImm<3>(N, DAG);
6375 case Intrinsic::loongarch_lsx_vbitclri_h:
6376 case Intrinsic::loongarch_lasx_xvbitclri_h:
6377 return lowerVectorBitClearImm<4>(N, DAG);
6378 case Intrinsic::loongarch_lsx_vbitclri_w:
6379 case Intrinsic::loongarch_lasx_xvbitclri_w:
6380 return lowerVectorBitClearImm<5>(N, DAG);
6381 case Intrinsic::loongarch_lsx_vbitclri_d:
6382 case Intrinsic::loongarch_lasx_xvbitclri_d:
6383 return lowerVectorBitClearImm<6>(N, DAG);
6384 case Intrinsic::loongarch_lsx_vbitset_b:
6385 case Intrinsic::loongarch_lsx_vbitset_h:
6386 case Intrinsic::loongarch_lsx_vbitset_w:
6387 case Intrinsic::loongarch_lsx_vbitset_d:
6388 case Intrinsic::loongarch_lasx_xvbitset_b:
6389 case Intrinsic::loongarch_lasx_xvbitset_h:
6390 case Intrinsic::loongarch_lasx_xvbitset_w:
6391 case Intrinsic::loongarch_lasx_xvbitset_d: {
6392 EVT VecTy = N->getValueType(0);
6393 SDValue One = DAG.getConstant(1, DL, VecTy);
6394 return DAG.getNode(
6395 ISD::OR, DL, VecTy, N->getOperand(1),
6396 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6397 }
6398 case Intrinsic::loongarch_lsx_vbitseti_b:
6399 case Intrinsic::loongarch_lasx_xvbitseti_b:
6400 return lowerVectorBitSetImm<3>(N, DAG);
6401 case Intrinsic::loongarch_lsx_vbitseti_h:
6402 case Intrinsic::loongarch_lasx_xvbitseti_h:
6403 return lowerVectorBitSetImm<4>(N, DAG);
6404 case Intrinsic::loongarch_lsx_vbitseti_w:
6405 case Intrinsic::loongarch_lasx_xvbitseti_w:
6406 return lowerVectorBitSetImm<5>(N, DAG);
6407 case Intrinsic::loongarch_lsx_vbitseti_d:
6408 case Intrinsic::loongarch_lasx_xvbitseti_d:
6409 return lowerVectorBitSetImm<6>(N, DAG);
6410 case Intrinsic::loongarch_lsx_vbitrev_b:
6411 case Intrinsic::loongarch_lsx_vbitrev_h:
6412 case Intrinsic::loongarch_lsx_vbitrev_w:
6413 case Intrinsic::loongarch_lsx_vbitrev_d:
6414 case Intrinsic::loongarch_lasx_xvbitrev_b:
6415 case Intrinsic::loongarch_lasx_xvbitrev_h:
6416 case Intrinsic::loongarch_lasx_xvbitrev_w:
6417 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6418 EVT VecTy = N->getValueType(0);
6419 SDValue One = DAG.getConstant(1, DL, VecTy);
6420 return DAG.getNode(
6421 ISD::XOR, DL, VecTy, N->getOperand(1),
6422 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6423 }
6424 case Intrinsic::loongarch_lsx_vbitrevi_b:
6425 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6426 return lowerVectorBitRevImm<3>(N, DAG);
6427 case Intrinsic::loongarch_lsx_vbitrevi_h:
6428 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6429 return lowerVectorBitRevImm<4>(N, DAG);
6430 case Intrinsic::loongarch_lsx_vbitrevi_w:
6431 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6432 return lowerVectorBitRevImm<5>(N, DAG);
6433 case Intrinsic::loongarch_lsx_vbitrevi_d:
6434 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6435 return lowerVectorBitRevImm<6>(N, DAG);
6436 case Intrinsic::loongarch_lsx_vfadd_s:
6437 case Intrinsic::loongarch_lsx_vfadd_d:
6438 case Intrinsic::loongarch_lasx_xvfadd_s:
6439 case Intrinsic::loongarch_lasx_xvfadd_d:
6440 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6441 N->getOperand(2));
6442 case Intrinsic::loongarch_lsx_vfsub_s:
6443 case Intrinsic::loongarch_lsx_vfsub_d:
6444 case Intrinsic::loongarch_lasx_xvfsub_s:
6445 case Intrinsic::loongarch_lasx_xvfsub_d:
6446 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6447 N->getOperand(2));
6448 case Intrinsic::loongarch_lsx_vfmul_s:
6449 case Intrinsic::loongarch_lsx_vfmul_d:
6450 case Intrinsic::loongarch_lasx_xvfmul_s:
6451 case Intrinsic::loongarch_lasx_xvfmul_d:
6452 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6453 N->getOperand(2));
6454 case Intrinsic::loongarch_lsx_vfdiv_s:
6455 case Intrinsic::loongarch_lsx_vfdiv_d:
6456 case Intrinsic::loongarch_lasx_xvfdiv_s:
6457 case Intrinsic::loongarch_lasx_xvfdiv_d:
6458 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6459 N->getOperand(2));
6460 case Intrinsic::loongarch_lsx_vfmadd_s:
6461 case Intrinsic::loongarch_lsx_vfmadd_d:
6462 case Intrinsic::loongarch_lasx_xvfmadd_s:
6463 case Intrinsic::loongarch_lasx_xvfmadd_d:
6464 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6465 N->getOperand(2), N->getOperand(3));
6466 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6467 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6468 N->getOperand(1), N->getOperand(2),
6469 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6470 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6471 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6472 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6473 N->getOperand(1), N->getOperand(2),
6474 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6475 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6476 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6477 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6478 N->getOperand(1), N->getOperand(2),
6479 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6480 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6481 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6482 N->getOperand(1), N->getOperand(2),
6483 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6484 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6485 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6486 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6487 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6488 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6489 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6490 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6491 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6492 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6493 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6494 N->getOperand(1)));
6495 case Intrinsic::loongarch_lsx_vreplve_b:
6496 case Intrinsic::loongarch_lsx_vreplve_h:
6497 case Intrinsic::loongarch_lsx_vreplve_w:
6498 case Intrinsic::loongarch_lsx_vreplve_d:
6499 case Intrinsic::loongarch_lasx_xvreplve_b:
6500 case Intrinsic::loongarch_lasx_xvreplve_h:
6501 case Intrinsic::loongarch_lasx_xvreplve_w:
6502 case Intrinsic::loongarch_lasx_xvreplve_d:
6503 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6504 N->getOperand(1),
6505 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6506 N->getOperand(2)));
6507 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6508 if (!Subtarget.is64Bit())
6510 break;
6511 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6512 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6513 if (!Subtarget.is64Bit())
6515 break;
6516 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6517 if (!Subtarget.is64Bit())
6519 break;
6520 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6521 if (!Subtarget.is64Bit())
6523 break;
6524 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6525 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6526 if (!Subtarget.is64Bit())
6528 break;
6529 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6530 if (!Subtarget.is64Bit())
6532 break;
6533 case Intrinsic::loongarch_lsx_bz_b:
6534 case Intrinsic::loongarch_lsx_bz_h:
6535 case Intrinsic::loongarch_lsx_bz_w:
6536 case Intrinsic::loongarch_lsx_bz_d:
6537 case Intrinsic::loongarch_lasx_xbz_b:
6538 case Intrinsic::loongarch_lasx_xbz_h:
6539 case Intrinsic::loongarch_lasx_xbz_w:
6540 case Intrinsic::loongarch_lasx_xbz_d:
6541 if (!Subtarget.is64Bit())
6542 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6543 N->getOperand(1));
6544 break;
6545 case Intrinsic::loongarch_lsx_bz_v:
6546 case Intrinsic::loongarch_lasx_xbz_v:
6547 if (!Subtarget.is64Bit())
6548 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6549 N->getOperand(1));
6550 break;
6551 case Intrinsic::loongarch_lsx_bnz_b:
6552 case Intrinsic::loongarch_lsx_bnz_h:
6553 case Intrinsic::loongarch_lsx_bnz_w:
6554 case Intrinsic::loongarch_lsx_bnz_d:
6555 case Intrinsic::loongarch_lasx_xbnz_b:
6556 case Intrinsic::loongarch_lasx_xbnz_h:
6557 case Intrinsic::loongarch_lasx_xbnz_w:
6558 case Intrinsic::loongarch_lasx_xbnz_d:
6559 if (!Subtarget.is64Bit())
6560 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6561 N->getOperand(1));
6562 break;
6563 case Intrinsic::loongarch_lsx_bnz_v:
6564 case Intrinsic::loongarch_lasx_xbnz_v:
6565 if (!Subtarget.is64Bit())
6566 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6567 N->getOperand(1));
6568 break;
6569 }
6570 return SDValue();
6571}
6572
6575 const LoongArchSubtarget &Subtarget) {
6576 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6577 // conversion is unnecessary and can be replaced with the
6578 // MOVFR2GR_S_LA64 operand.
6579 SDValue Op0 = N->getOperand(0);
6581 return Op0.getOperand(0);
6582 return SDValue();
6583}
6584
6587 const LoongArchSubtarget &Subtarget) {
6588 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6589 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6590 // operand.
6591 SDValue Op0 = N->getOperand(0);
6593 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6594 "Unexpected value type!");
6595 return Op0.getOperand(0);
6596 }
6597 return SDValue();
6598}
6599
6602 const LoongArchSubtarget &Subtarget) {
6603 MVT VT = N->getSimpleValueType(0);
6604 unsigned NumBits = VT.getScalarSizeInBits();
6605
6606 // Simplify the inputs.
6607 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6608 APInt DemandedMask(APInt::getAllOnes(NumBits));
6609 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6610 return SDValue(N, 0);
6611
6612 return SDValue();
6613}
6614
6615static SDValue
6618 const LoongArchSubtarget &Subtarget) {
6619 SDValue Op0 = N->getOperand(0);
6620 SDLoc DL(N);
6621
6622 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6623 // redundant. Instead, use BuildPairF64's operands directly.
6625 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6626
6627 if (Op0->isUndef()) {
6628 SDValue Lo = DAG.getUNDEF(MVT::i32);
6629 SDValue Hi = DAG.getUNDEF(MVT::i32);
6630 return DCI.CombineTo(N, Lo, Hi);
6631 }
6632
6633 // It's cheaper to materialise two 32-bit integers than to load a double
6634 // from the constant pool and transfer it to integer registers through the
6635 // stack.
6637 APInt V = C->getValueAPF().bitcastToAPInt();
6638 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6639 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6640 return DCI.CombineTo(N, Lo, Hi);
6641 }
6642
6643 return SDValue();
6644}
6645
6646static SDValue
6649 const LoongArchSubtarget &Subtarget) {
6650 if (!DCI.isBeforeLegalize())
6651 return SDValue();
6652
6653 MVT EltVT = N->getSimpleValueType(0);
6654 SDValue Vec = N->getOperand(0);
6655 EVT VecTy = Vec->getValueType(0);
6656 SDValue Idx = N->getOperand(1);
6657 unsigned IdxOp = Idx.getOpcode();
6658 SDLoc DL(N);
6659
6660 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6661 return SDValue();
6662
6663 // Combine:
6664 // t2 = truncate t1
6665 // t3 = {zero/sign/any}_extend t2
6666 // t4 = extract_vector_elt t0, t3
6667 // to:
6668 // t4 = extract_vector_elt t0, t1
6669 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6670 IdxOp == ISD::ANY_EXTEND) {
6671 SDValue IdxOrig = Idx.getOperand(0);
6672 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6673 return SDValue();
6674
6675 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6676 IdxOrig.getOperand(0));
6677 }
6678
6679 return SDValue();
6680}
6681
6683 DAGCombinerInfo &DCI) const {
6684 SelectionDAG &DAG = DCI.DAG;
6685 switch (N->getOpcode()) {
6686 default:
6687 break;
6688 case ISD::AND:
6689 return performANDCombine(N, DAG, DCI, Subtarget);
6690 case ISD::OR:
6691 return performORCombine(N, DAG, DCI, Subtarget);
6692 case ISD::SETCC:
6693 return performSETCCCombine(N, DAG, DCI, Subtarget);
6694 case ISD::SRL:
6695 return performSRLCombine(N, DAG, DCI, Subtarget);
6696 case ISD::BITCAST:
6697 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6699 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6701 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6703 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6705 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6707 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6709 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6712 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6714 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6716 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6717 }
6718 return SDValue();
6719}
6720
6723 if (!ZeroDivCheck)
6724 return MBB;
6725
6726 // Build instructions:
6727 // MBB:
6728 // div(or mod) $dst, $dividend, $divisor
6729 // bne $divisor, $zero, SinkMBB
6730 // BreakMBB:
6731 // break 7 // BRK_DIVZERO
6732 // SinkMBB:
6733 // fallthrough
6734 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6735 MachineFunction::iterator It = ++MBB->getIterator();
6736 MachineFunction *MF = MBB->getParent();
6737 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6738 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6739 MF->insert(It, BreakMBB);
6740 MF->insert(It, SinkMBB);
6741
6742 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6743 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6744 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6745
6746 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6747 DebugLoc DL = MI.getDebugLoc();
6748 MachineOperand &Divisor = MI.getOperand(2);
6749 Register DivisorReg = Divisor.getReg();
6750
6751 // MBB:
6752 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6753 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6754 .addReg(LoongArch::R0)
6755 .addMBB(SinkMBB);
6756 MBB->addSuccessor(BreakMBB);
6757 MBB->addSuccessor(SinkMBB);
6758
6759 // BreakMBB:
6760 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6761 // definition of BRK_DIVZERO.
6762 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6763 BreakMBB->addSuccessor(SinkMBB);
6764
6765 // Clear Divisor's kill flag.
6766 Divisor.setIsKill(false);
6767
6768 return SinkMBB;
6769}
6770
6771static MachineBasicBlock *
6773 const LoongArchSubtarget &Subtarget) {
6774 unsigned CondOpc;
6775 switch (MI.getOpcode()) {
6776 default:
6777 llvm_unreachable("Unexpected opcode");
6778 case LoongArch::PseudoVBZ:
6779 CondOpc = LoongArch::VSETEQZ_V;
6780 break;
6781 case LoongArch::PseudoVBZ_B:
6782 CondOpc = LoongArch::VSETANYEQZ_B;
6783 break;
6784 case LoongArch::PseudoVBZ_H:
6785 CondOpc = LoongArch::VSETANYEQZ_H;
6786 break;
6787 case LoongArch::PseudoVBZ_W:
6788 CondOpc = LoongArch::VSETANYEQZ_W;
6789 break;
6790 case LoongArch::PseudoVBZ_D:
6791 CondOpc = LoongArch::VSETANYEQZ_D;
6792 break;
6793 case LoongArch::PseudoVBNZ:
6794 CondOpc = LoongArch::VSETNEZ_V;
6795 break;
6796 case LoongArch::PseudoVBNZ_B:
6797 CondOpc = LoongArch::VSETALLNEZ_B;
6798 break;
6799 case LoongArch::PseudoVBNZ_H:
6800 CondOpc = LoongArch::VSETALLNEZ_H;
6801 break;
6802 case LoongArch::PseudoVBNZ_W:
6803 CondOpc = LoongArch::VSETALLNEZ_W;
6804 break;
6805 case LoongArch::PseudoVBNZ_D:
6806 CondOpc = LoongArch::VSETALLNEZ_D;
6807 break;
6808 case LoongArch::PseudoXVBZ:
6809 CondOpc = LoongArch::XVSETEQZ_V;
6810 break;
6811 case LoongArch::PseudoXVBZ_B:
6812 CondOpc = LoongArch::XVSETANYEQZ_B;
6813 break;
6814 case LoongArch::PseudoXVBZ_H:
6815 CondOpc = LoongArch::XVSETANYEQZ_H;
6816 break;
6817 case LoongArch::PseudoXVBZ_W:
6818 CondOpc = LoongArch::XVSETANYEQZ_W;
6819 break;
6820 case LoongArch::PseudoXVBZ_D:
6821 CondOpc = LoongArch::XVSETANYEQZ_D;
6822 break;
6823 case LoongArch::PseudoXVBNZ:
6824 CondOpc = LoongArch::XVSETNEZ_V;
6825 break;
6826 case LoongArch::PseudoXVBNZ_B:
6827 CondOpc = LoongArch::XVSETALLNEZ_B;
6828 break;
6829 case LoongArch::PseudoXVBNZ_H:
6830 CondOpc = LoongArch::XVSETALLNEZ_H;
6831 break;
6832 case LoongArch::PseudoXVBNZ_W:
6833 CondOpc = LoongArch::XVSETALLNEZ_W;
6834 break;
6835 case LoongArch::PseudoXVBNZ_D:
6836 CondOpc = LoongArch::XVSETALLNEZ_D;
6837 break;
6838 }
6839
6840 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6841 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6842 DebugLoc DL = MI.getDebugLoc();
6845
6846 MachineFunction *F = BB->getParent();
6847 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6848 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6849 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6850
6851 F->insert(It, FalseBB);
6852 F->insert(It, TrueBB);
6853 F->insert(It, SinkBB);
6854
6855 // Transfer the remainder of MBB and its successor edges to Sink.
6856 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6858
6859 // Insert the real instruction to BB.
6860 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6861 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6862
6863 // Insert branch.
6864 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6865 BB->addSuccessor(FalseBB);
6866 BB->addSuccessor(TrueBB);
6867
6868 // FalseBB.
6869 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6870 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6871 .addReg(LoongArch::R0)
6872 .addImm(0);
6873 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6874 FalseBB->addSuccessor(SinkBB);
6875
6876 // TrueBB.
6877 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6878 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6879 .addReg(LoongArch::R0)
6880 .addImm(1);
6881 TrueBB->addSuccessor(SinkBB);
6882
6883 // SinkBB: merge the results.
6884 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6885 MI.getOperand(0).getReg())
6886 .addReg(RD1)
6887 .addMBB(FalseBB)
6888 .addReg(RD2)
6889 .addMBB(TrueBB);
6890
6891 // The pseudo instruction is gone now.
6892 MI.eraseFromParent();
6893 return SinkBB;
6894}
6895
6896static MachineBasicBlock *
6898 const LoongArchSubtarget &Subtarget) {
6899 unsigned InsOp;
6900 unsigned BroadcastOp;
6901 unsigned HalfSize;
6902 switch (MI.getOpcode()) {
6903 default:
6904 llvm_unreachable("Unexpected opcode");
6905 case LoongArch::PseudoXVINSGR2VR_B:
6906 HalfSize = 16;
6907 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6908 InsOp = LoongArch::XVEXTRINS_B;
6909 break;
6910 case LoongArch::PseudoXVINSGR2VR_H:
6911 HalfSize = 8;
6912 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6913 InsOp = LoongArch::XVEXTRINS_H;
6914 break;
6915 }
6916 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6917 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6918 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6919 DebugLoc DL = MI.getDebugLoc();
6921 // XDst = vector_insert XSrc, Elt, Idx
6922 Register XDst = MI.getOperand(0).getReg();
6923 Register XSrc = MI.getOperand(1).getReg();
6924 Register Elt = MI.getOperand(2).getReg();
6925 unsigned Idx = MI.getOperand(3).getImm();
6926
6927 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6928 Idx < HalfSize) {
6929 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6930 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6931
6932 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6933 .addReg(XSrc, 0, LoongArch::sub_128);
6934 BuildMI(*BB, MI, DL,
6935 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6936 : LoongArch::VINSGR2VR_B),
6937 ScratchSubReg2)
6938 .addReg(ScratchSubReg1)
6939 .addReg(Elt)
6940 .addImm(Idx);
6941
6942 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6943 .addImm(0)
6944 .addReg(ScratchSubReg2)
6945 .addImm(LoongArch::sub_128);
6946 } else {
6947 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6948 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6949
6950 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6951
6952 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6953 .addReg(ScratchReg1)
6954 .addReg(XSrc)
6955 .addImm(Idx >= HalfSize ? 48 : 18);
6956
6957 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6958 .addReg(XSrc)
6959 .addReg(ScratchReg2)
6960 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6961 }
6962
6963 MI.eraseFromParent();
6964 return BB;
6965}
6966
6969 const LoongArchSubtarget &Subtarget) {
6970 assert(Subtarget.hasExtLSX());
6971 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6972 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6973 DebugLoc DL = MI.getDebugLoc();
6975 Register Dst = MI.getOperand(0).getReg();
6976 Register Src = MI.getOperand(1).getReg();
6977 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6978 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6979 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6980
6981 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6982 BuildMI(*BB, MI, DL,
6983 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6984 : LoongArch::VINSGR2VR_W),
6985 ScratchReg2)
6986 .addReg(ScratchReg1)
6987 .addReg(Src)
6988 .addImm(0);
6989 BuildMI(
6990 *BB, MI, DL,
6991 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6992 ScratchReg3)
6993 .addReg(ScratchReg2);
6994 BuildMI(*BB, MI, DL,
6995 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6996 : LoongArch::VPICKVE2GR_W),
6997 Dst)
6998 .addReg(ScratchReg3)
6999 .addImm(0);
7000
7001 MI.eraseFromParent();
7002 return BB;
7003}
7004
7005static MachineBasicBlock *
7007 const LoongArchSubtarget &Subtarget) {
7008 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7009 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7010 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7012 Register Dst = MI.getOperand(0).getReg();
7013 Register Src = MI.getOperand(1).getReg();
7014 DebugLoc DL = MI.getDebugLoc();
7015 unsigned EleBits = 8;
7016 unsigned NotOpc = 0;
7017 unsigned MskOpc;
7018
7019 switch (MI.getOpcode()) {
7020 default:
7021 llvm_unreachable("Unexpected opcode");
7022 case LoongArch::PseudoVMSKLTZ_B:
7023 MskOpc = LoongArch::VMSKLTZ_B;
7024 break;
7025 case LoongArch::PseudoVMSKLTZ_H:
7026 MskOpc = LoongArch::VMSKLTZ_H;
7027 EleBits = 16;
7028 break;
7029 case LoongArch::PseudoVMSKLTZ_W:
7030 MskOpc = LoongArch::VMSKLTZ_W;
7031 EleBits = 32;
7032 break;
7033 case LoongArch::PseudoVMSKLTZ_D:
7034 MskOpc = LoongArch::VMSKLTZ_D;
7035 EleBits = 64;
7036 break;
7037 case LoongArch::PseudoVMSKGEZ_B:
7038 MskOpc = LoongArch::VMSKGEZ_B;
7039 break;
7040 case LoongArch::PseudoVMSKEQZ_B:
7041 MskOpc = LoongArch::VMSKNZ_B;
7042 NotOpc = LoongArch::VNOR_V;
7043 break;
7044 case LoongArch::PseudoVMSKNEZ_B:
7045 MskOpc = LoongArch::VMSKNZ_B;
7046 break;
7047 case LoongArch::PseudoXVMSKLTZ_B:
7048 MskOpc = LoongArch::XVMSKLTZ_B;
7049 RC = &LoongArch::LASX256RegClass;
7050 break;
7051 case LoongArch::PseudoXVMSKLTZ_H:
7052 MskOpc = LoongArch::XVMSKLTZ_H;
7053 RC = &LoongArch::LASX256RegClass;
7054 EleBits = 16;
7055 break;
7056 case LoongArch::PseudoXVMSKLTZ_W:
7057 MskOpc = LoongArch::XVMSKLTZ_W;
7058 RC = &LoongArch::LASX256RegClass;
7059 EleBits = 32;
7060 break;
7061 case LoongArch::PseudoXVMSKLTZ_D:
7062 MskOpc = LoongArch::XVMSKLTZ_D;
7063 RC = &LoongArch::LASX256RegClass;
7064 EleBits = 64;
7065 break;
7066 case LoongArch::PseudoXVMSKGEZ_B:
7067 MskOpc = LoongArch::XVMSKGEZ_B;
7068 RC = &LoongArch::LASX256RegClass;
7069 break;
7070 case LoongArch::PseudoXVMSKEQZ_B:
7071 MskOpc = LoongArch::XVMSKNZ_B;
7072 NotOpc = LoongArch::XVNOR_V;
7073 RC = &LoongArch::LASX256RegClass;
7074 break;
7075 case LoongArch::PseudoXVMSKNEZ_B:
7076 MskOpc = LoongArch::XVMSKNZ_B;
7077 RC = &LoongArch::LASX256RegClass;
7078 break;
7079 }
7080
7081 Register Msk = MRI.createVirtualRegister(RC);
7082 if (NotOpc) {
7083 Register Tmp = MRI.createVirtualRegister(RC);
7084 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7085 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7086 .addReg(Tmp, RegState::Kill)
7087 .addReg(Tmp, RegState::Kill);
7088 } else {
7089 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7090 }
7091
7092 if (TRI->getRegSizeInBits(*RC) > 128) {
7093 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7094 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7095 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7096 .addReg(Msk)
7097 .addImm(0);
7098 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7099 .addReg(Msk, RegState::Kill)
7100 .addImm(4);
7101 BuildMI(*BB, MI, DL,
7102 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7103 : LoongArch::BSTRINS_W),
7104 Dst)
7107 .addImm(256 / EleBits - 1)
7108 .addImm(128 / EleBits);
7109 } else {
7110 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7111 .addReg(Msk, RegState::Kill)
7112 .addImm(0);
7113 }
7114
7115 MI.eraseFromParent();
7116 return BB;
7117}
7118
7119static MachineBasicBlock *
7121 const LoongArchSubtarget &Subtarget) {
7122 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7123 "Unexpected instruction");
7124
7125 MachineFunction &MF = *BB->getParent();
7126 DebugLoc DL = MI.getDebugLoc();
7128 Register LoReg = MI.getOperand(0).getReg();
7129 Register HiReg = MI.getOperand(1).getReg();
7130 Register SrcReg = MI.getOperand(2).getReg();
7131
7132 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7133 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7134 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7135 MI.eraseFromParent(); // The pseudo instruction is gone now.
7136 return BB;
7137}
7138
7139static MachineBasicBlock *
7141 const LoongArchSubtarget &Subtarget) {
7142 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7143 "Unexpected instruction");
7144
7145 MachineFunction &MF = *BB->getParent();
7146 DebugLoc DL = MI.getDebugLoc();
7149 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7150 Register DstReg = MI.getOperand(0).getReg();
7151 Register LoReg = MI.getOperand(1).getReg();
7152 Register HiReg = MI.getOperand(2).getReg();
7153
7154 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7155 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7156 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7157 .addReg(TmpReg, RegState::Kill)
7158 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7159 MI.eraseFromParent(); // The pseudo instruction is gone now.
7160 return BB;
7161}
7162
7164 switch (MI.getOpcode()) {
7165 default:
7166 return false;
7167 case LoongArch::Select_GPR_Using_CC_GPR:
7168 return true;
7169 }
7170}
7171
7172static MachineBasicBlock *
7174 const LoongArchSubtarget &Subtarget) {
7175 // To "insert" Select_* instructions, we actually have to insert the triangle
7176 // control-flow pattern. The incoming instructions know the destination vreg
7177 // to set, the condition code register to branch on, the true/false values to
7178 // select between, and the condcode to use to select the appropriate branch.
7179 //
7180 // We produce the following control flow:
7181 // HeadMBB
7182 // | \
7183 // | IfFalseMBB
7184 // | /
7185 // TailMBB
7186 //
7187 // When we find a sequence of selects we attempt to optimize their emission
7188 // by sharing the control flow. Currently we only handle cases where we have
7189 // multiple selects with the exact same condition (same LHS, RHS and CC).
7190 // The selects may be interleaved with other instructions if the other
7191 // instructions meet some requirements we deem safe:
7192 // - They are not pseudo instructions.
7193 // - They are debug instructions. Otherwise,
7194 // - They do not have side-effects, do not access memory and their inputs do
7195 // not depend on the results of the select pseudo-instructions.
7196 // The TrueV/FalseV operands of the selects cannot depend on the result of
7197 // previous selects in the sequence.
7198 // These conditions could be further relaxed. See the X86 target for a
7199 // related approach and more information.
7200
7201 Register LHS = MI.getOperand(1).getReg();
7202 Register RHS;
7203 if (MI.getOperand(2).isReg())
7204 RHS = MI.getOperand(2).getReg();
7205 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7206
7207 SmallVector<MachineInstr *, 4> SelectDebugValues;
7208 SmallSet<Register, 4> SelectDests;
7209 SelectDests.insert(MI.getOperand(0).getReg());
7210
7211 MachineInstr *LastSelectPseudo = &MI;
7212 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7213 SequenceMBBI != E; ++SequenceMBBI) {
7214 if (SequenceMBBI->isDebugInstr())
7215 continue;
7216 if (isSelectPseudo(*SequenceMBBI)) {
7217 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7218 !SequenceMBBI->getOperand(2).isReg() ||
7219 SequenceMBBI->getOperand(2).getReg() != RHS ||
7220 SequenceMBBI->getOperand(3).getImm() != CC ||
7221 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7222 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7223 break;
7224 LastSelectPseudo = &*SequenceMBBI;
7225 SequenceMBBI->collectDebugValues(SelectDebugValues);
7226 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7227 continue;
7228 }
7229 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7230 SequenceMBBI->mayLoadOrStore() ||
7231 SequenceMBBI->usesCustomInsertionHook())
7232 break;
7233 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7234 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7235 }))
7236 break;
7237 }
7238
7239 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7240 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7241 DebugLoc DL = MI.getDebugLoc();
7243
7244 MachineBasicBlock *HeadMBB = BB;
7245 MachineFunction *F = BB->getParent();
7246 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7247 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7248
7249 F->insert(I, IfFalseMBB);
7250 F->insert(I, TailMBB);
7251
7252 // Set the call frame size on entry to the new basic blocks.
7253 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7254 IfFalseMBB->setCallFrameSize(CallFrameSize);
7255 TailMBB->setCallFrameSize(CallFrameSize);
7256
7257 // Transfer debug instructions associated with the selects to TailMBB.
7258 for (MachineInstr *DebugInstr : SelectDebugValues) {
7259 TailMBB->push_back(DebugInstr->removeFromParent());
7260 }
7261
7262 // Move all instructions after the sequence to TailMBB.
7263 TailMBB->splice(TailMBB->end(), HeadMBB,
7264 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7265 // Update machine-CFG edges by transferring all successors of the current
7266 // block to the new block which will contain the Phi nodes for the selects.
7267 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7268 // Set the successors for HeadMBB.
7269 HeadMBB->addSuccessor(IfFalseMBB);
7270 HeadMBB->addSuccessor(TailMBB);
7271
7272 // Insert appropriate branch.
7273 if (MI.getOperand(2).isImm())
7274 BuildMI(HeadMBB, DL, TII.get(CC))
7275 .addReg(LHS)
7276 .addImm(MI.getOperand(2).getImm())
7277 .addMBB(TailMBB);
7278 else
7279 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7280
7281 // IfFalseMBB just falls through to TailMBB.
7282 IfFalseMBB->addSuccessor(TailMBB);
7283
7284 // Create PHIs for all of the select pseudo-instructions.
7285 auto SelectMBBI = MI.getIterator();
7286 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7287 auto InsertionPoint = TailMBB->begin();
7288 while (SelectMBBI != SelectEnd) {
7289 auto Next = std::next(SelectMBBI);
7290 if (isSelectPseudo(*SelectMBBI)) {
7291 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7292 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7293 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7294 .addReg(SelectMBBI->getOperand(4).getReg())
7295 .addMBB(HeadMBB)
7296 .addReg(SelectMBBI->getOperand(5).getReg())
7297 .addMBB(IfFalseMBB);
7298 SelectMBBI->eraseFromParent();
7299 }
7300 SelectMBBI = Next;
7301 }
7302
7303 F->getProperties().resetNoPHIs();
7304 return TailMBB;
7305}
7306
7307MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7308 MachineInstr &MI, MachineBasicBlock *BB) const {
7309 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7310 DebugLoc DL = MI.getDebugLoc();
7311
7312 switch (MI.getOpcode()) {
7313 default:
7314 llvm_unreachable("Unexpected instr type to insert");
7315 case LoongArch::DIV_W:
7316 case LoongArch::DIV_WU:
7317 case LoongArch::MOD_W:
7318 case LoongArch::MOD_WU:
7319 case LoongArch::DIV_D:
7320 case LoongArch::DIV_DU:
7321 case LoongArch::MOD_D:
7322 case LoongArch::MOD_DU:
7323 return insertDivByZeroTrap(MI, BB);
7324 break;
7325 case LoongArch::WRFCSR: {
7326 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7327 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7328 .addReg(MI.getOperand(1).getReg());
7329 MI.eraseFromParent();
7330 return BB;
7331 }
7332 case LoongArch::RDFCSR: {
7333 MachineInstr *ReadFCSR =
7334 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7335 MI.getOperand(0).getReg())
7336 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7337 ReadFCSR->getOperand(1).setIsUndef();
7338 MI.eraseFromParent();
7339 return BB;
7340 }
7341 case LoongArch::Select_GPR_Using_CC_GPR:
7342 return emitSelectPseudo(MI, BB, Subtarget);
7343 case LoongArch::BuildPairF64Pseudo:
7344 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7345 case LoongArch::SplitPairF64Pseudo:
7346 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7347 case LoongArch::PseudoVBZ:
7348 case LoongArch::PseudoVBZ_B:
7349 case LoongArch::PseudoVBZ_H:
7350 case LoongArch::PseudoVBZ_W:
7351 case LoongArch::PseudoVBZ_D:
7352 case LoongArch::PseudoVBNZ:
7353 case LoongArch::PseudoVBNZ_B:
7354 case LoongArch::PseudoVBNZ_H:
7355 case LoongArch::PseudoVBNZ_W:
7356 case LoongArch::PseudoVBNZ_D:
7357 case LoongArch::PseudoXVBZ:
7358 case LoongArch::PseudoXVBZ_B:
7359 case LoongArch::PseudoXVBZ_H:
7360 case LoongArch::PseudoXVBZ_W:
7361 case LoongArch::PseudoXVBZ_D:
7362 case LoongArch::PseudoXVBNZ:
7363 case LoongArch::PseudoXVBNZ_B:
7364 case LoongArch::PseudoXVBNZ_H:
7365 case LoongArch::PseudoXVBNZ_W:
7366 case LoongArch::PseudoXVBNZ_D:
7367 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7368 case LoongArch::PseudoXVINSGR2VR_B:
7369 case LoongArch::PseudoXVINSGR2VR_H:
7370 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7371 case LoongArch::PseudoCTPOP:
7372 return emitPseudoCTPOP(MI, BB, Subtarget);
7373 case LoongArch::PseudoVMSKLTZ_B:
7374 case LoongArch::PseudoVMSKLTZ_H:
7375 case LoongArch::PseudoVMSKLTZ_W:
7376 case LoongArch::PseudoVMSKLTZ_D:
7377 case LoongArch::PseudoVMSKGEZ_B:
7378 case LoongArch::PseudoVMSKEQZ_B:
7379 case LoongArch::PseudoVMSKNEZ_B:
7380 case LoongArch::PseudoXVMSKLTZ_B:
7381 case LoongArch::PseudoXVMSKLTZ_H:
7382 case LoongArch::PseudoXVMSKLTZ_W:
7383 case LoongArch::PseudoXVMSKLTZ_D:
7384 case LoongArch::PseudoXVMSKGEZ_B:
7385 case LoongArch::PseudoXVMSKEQZ_B:
7386 case LoongArch::PseudoXVMSKNEZ_B:
7387 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7388 case TargetOpcode::STATEPOINT:
7389 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7390 // while bl call instruction (where statepoint will be lowered at the
7391 // end) has implicit def. This def is early-clobber as it will be set at
7392 // the moment of the call and earlier than any use is read.
7393 // Add this implicit dead def here as a workaround.
7394 MI.addOperand(*MI.getMF(),
7396 LoongArch::R1, /*isDef*/ true,
7397 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7398 /*isUndef*/ false, /*isEarlyClobber*/ true));
7399 if (!Subtarget.is64Bit())
7400 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7401 return emitPatchPoint(MI, BB);
7402 }
7403}
7404
7406 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7407 unsigned *Fast) const {
7408 if (!Subtarget.hasUAL())
7409 return false;
7410
7411 // TODO: set reasonable speed number.
7412 if (Fast)
7413 *Fast = 1;
7414 return true;
7415}
7416
7417const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7418 switch ((LoongArchISD::NodeType)Opcode) {
7420 break;
7421
7422#define NODE_NAME_CASE(node) \
7423 case LoongArchISD::node: \
7424 return "LoongArchISD::" #node;
7425
7426 // TODO: Add more target-dependent nodes later.
7427 NODE_NAME_CASE(CALL)
7428 NODE_NAME_CASE(CALL_MEDIUM)
7429 NODE_NAME_CASE(CALL_LARGE)
7430 NODE_NAME_CASE(RET)
7431 NODE_NAME_CASE(TAIL)
7432 NODE_NAME_CASE(TAIL_MEDIUM)
7433 NODE_NAME_CASE(TAIL_LARGE)
7434 NODE_NAME_CASE(SELECT_CC)
7435 NODE_NAME_CASE(BR_CC)
7436 NODE_NAME_CASE(BRCOND)
7437 NODE_NAME_CASE(SLL_W)
7438 NODE_NAME_CASE(SRA_W)
7439 NODE_NAME_CASE(SRL_W)
7440 NODE_NAME_CASE(BSTRINS)
7441 NODE_NAME_CASE(BSTRPICK)
7442 NODE_NAME_CASE(MOVGR2FR_W)
7443 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7444 NODE_NAME_CASE(MOVGR2FR_D)
7445 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7446 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7447 NODE_NAME_CASE(FTINT)
7448 NODE_NAME_CASE(BUILD_PAIR_F64)
7449 NODE_NAME_CASE(SPLIT_PAIR_F64)
7450 NODE_NAME_CASE(REVB_2H)
7451 NODE_NAME_CASE(REVB_2W)
7452 NODE_NAME_CASE(BITREV_4B)
7453 NODE_NAME_CASE(BITREV_8B)
7454 NODE_NAME_CASE(BITREV_W)
7455 NODE_NAME_CASE(ROTR_W)
7456 NODE_NAME_CASE(ROTL_W)
7457 NODE_NAME_CASE(DIV_W)
7458 NODE_NAME_CASE(DIV_WU)
7459 NODE_NAME_CASE(MOD_W)
7460 NODE_NAME_CASE(MOD_WU)
7461 NODE_NAME_CASE(CLZ_W)
7462 NODE_NAME_CASE(CTZ_W)
7463 NODE_NAME_CASE(DBAR)
7464 NODE_NAME_CASE(IBAR)
7465 NODE_NAME_CASE(BREAK)
7466 NODE_NAME_CASE(SYSCALL)
7467 NODE_NAME_CASE(CRC_W_B_W)
7468 NODE_NAME_CASE(CRC_W_H_W)
7469 NODE_NAME_CASE(CRC_W_W_W)
7470 NODE_NAME_CASE(CRC_W_D_W)
7471 NODE_NAME_CASE(CRCC_W_B_W)
7472 NODE_NAME_CASE(CRCC_W_H_W)
7473 NODE_NAME_CASE(CRCC_W_W_W)
7474 NODE_NAME_CASE(CRCC_W_D_W)
7475 NODE_NAME_CASE(CSRRD)
7476 NODE_NAME_CASE(CSRWR)
7477 NODE_NAME_CASE(CSRXCHG)
7478 NODE_NAME_CASE(IOCSRRD_B)
7479 NODE_NAME_CASE(IOCSRRD_H)
7480 NODE_NAME_CASE(IOCSRRD_W)
7481 NODE_NAME_CASE(IOCSRRD_D)
7482 NODE_NAME_CASE(IOCSRWR_B)
7483 NODE_NAME_CASE(IOCSRWR_H)
7484 NODE_NAME_CASE(IOCSRWR_W)
7485 NODE_NAME_CASE(IOCSRWR_D)
7486 NODE_NAME_CASE(CPUCFG)
7487 NODE_NAME_CASE(MOVGR2FCSR)
7488 NODE_NAME_CASE(MOVFCSR2GR)
7489 NODE_NAME_CASE(CACOP_D)
7490 NODE_NAME_CASE(CACOP_W)
7491 NODE_NAME_CASE(VSHUF)
7492 NODE_NAME_CASE(VPICKEV)
7493 NODE_NAME_CASE(VPICKOD)
7494 NODE_NAME_CASE(VPACKEV)
7495 NODE_NAME_CASE(VPACKOD)
7496 NODE_NAME_CASE(VILVL)
7497 NODE_NAME_CASE(VILVH)
7498 NODE_NAME_CASE(VSHUF4I)
7499 NODE_NAME_CASE(VREPLVEI)
7500 NODE_NAME_CASE(VREPLGR2VR)
7501 NODE_NAME_CASE(XVPERMI)
7502 NODE_NAME_CASE(XVPERM)
7503 NODE_NAME_CASE(XVREPLVE0)
7504 NODE_NAME_CASE(XVREPLVE0Q)
7505 NODE_NAME_CASE(XVINSVE0)
7506 NODE_NAME_CASE(VPICK_SEXT_ELT)
7507 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7508 NODE_NAME_CASE(VREPLVE)
7509 NODE_NAME_CASE(VALL_ZERO)
7510 NODE_NAME_CASE(VANY_ZERO)
7511 NODE_NAME_CASE(VALL_NONZERO)
7512 NODE_NAME_CASE(VANY_NONZERO)
7513 NODE_NAME_CASE(FRECIPE)
7514 NODE_NAME_CASE(FRSQRTE)
7515 NODE_NAME_CASE(VSLLI)
7516 NODE_NAME_CASE(VSRLI)
7517 NODE_NAME_CASE(VBSLL)
7518 NODE_NAME_CASE(VBSRL)
7519 NODE_NAME_CASE(VLDREPL)
7520 NODE_NAME_CASE(VMSKLTZ)
7521 NODE_NAME_CASE(VMSKGEZ)
7522 NODE_NAME_CASE(VMSKEQZ)
7523 NODE_NAME_CASE(VMSKNEZ)
7524 NODE_NAME_CASE(XVMSKLTZ)
7525 NODE_NAME_CASE(XVMSKGEZ)
7526 NODE_NAME_CASE(XVMSKEQZ)
7527 NODE_NAME_CASE(XVMSKNEZ)
7528 NODE_NAME_CASE(VHADDW)
7529 }
7530#undef NODE_NAME_CASE
7531 return nullptr;
7532}
7533
7534//===----------------------------------------------------------------------===//
7535// Calling Convention Implementation
7536//===----------------------------------------------------------------------===//
7537
7538// Eight general-purpose registers a0-a7 used for passing integer arguments,
7539// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7540// fixed-point arguments, and floating-point arguments when no FPR is available
7541// or with soft float ABI.
7542const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7543 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7544 LoongArch::R10, LoongArch::R11};
7545// Eight floating-point registers fa0-fa7 used for passing floating-point
7546// arguments, and fa0-fa1 are also used to return values.
7547const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7548 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7549 LoongArch::F6, LoongArch::F7};
7550// FPR32 and FPR64 alias each other.
7552 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7553 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7554
7555const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7556 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7557 LoongArch::VR6, LoongArch::VR7};
7558
7559const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7560 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7561 LoongArch::XR6, LoongArch::XR7};
7562
7563// Pass a 2*GRLen argument that has been split into two GRLen values through
7564// registers or the stack as necessary.
7565static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7566 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7567 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7568 ISD::ArgFlagsTy ArgFlags2) {
7569 unsigned GRLenInBytes = GRLen / 8;
7570 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7571 // At least one half can be passed via register.
7572 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7573 VA1.getLocVT(), CCValAssign::Full));
7574 } else {
7575 // Both halves must be passed on the stack, with proper alignment.
7576 Align StackAlign =
7577 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7578 State.addLoc(
7580 State.AllocateStack(GRLenInBytes, StackAlign),
7581 VA1.getLocVT(), CCValAssign::Full));
7582 State.addLoc(CCValAssign::getMem(
7583 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7584 LocVT2, CCValAssign::Full));
7585 return false;
7586 }
7587 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7588 // The second half can also be passed via register.
7589 State.addLoc(
7590 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7591 } else {
7592 // The second half is passed via the stack, without additional alignment.
7593 State.addLoc(CCValAssign::getMem(
7594 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7595 LocVT2, CCValAssign::Full));
7596 }
7597 return false;
7598}
7599
7600// Implements the LoongArch calling convention. Returns true upon failure.
7602 unsigned ValNo, MVT ValVT,
7603 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7604 CCState &State, bool IsRet, Type *OrigTy) {
7605 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7606 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7607 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7608 MVT LocVT = ValVT;
7609
7610 // Any return value split into more than two values can't be returned
7611 // directly.
7612 if (IsRet && ValNo > 1)
7613 return true;
7614
7615 // If passing a variadic argument, or if no FPR is available.
7616 bool UseGPRForFloat = true;
7617
7618 switch (ABI) {
7619 default:
7620 llvm_unreachable("Unexpected ABI");
7621 break;
7626 UseGPRForFloat = ArgFlags.isVarArg();
7627 break;
7630 break;
7631 }
7632
7633 // If this is a variadic argument, the LoongArch calling convention requires
7634 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7635 // byte alignment. An aligned register should be used regardless of whether
7636 // the original argument was split during legalisation or not. The argument
7637 // will not be passed by registers if the original type is larger than
7638 // 2*GRLen, so the register alignment rule does not apply.
7639 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7640 if (ArgFlags.isVarArg() &&
7641 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7642 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7643 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7644 // Skip 'odd' register if necessary.
7645 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7646 State.AllocateReg(ArgGPRs);
7647 }
7648
7649 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7650 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7651 State.getPendingArgFlags();
7652
7653 assert(PendingLocs.size() == PendingArgFlags.size() &&
7654 "PendingLocs and PendingArgFlags out of sync");
7655
7656 // FPR32 and FPR64 alias each other.
7657 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7658 UseGPRForFloat = true;
7659
7660 if (UseGPRForFloat && ValVT == MVT::f32) {
7661 LocVT = GRLenVT;
7662 LocInfo = CCValAssign::BCvt;
7663 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7664 LocVT = MVT::i64;
7665 LocInfo = CCValAssign::BCvt;
7666 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7667 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7668 // registers are exhausted.
7669 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7670 // Depending on available argument GPRS, f64 may be passed in a pair of
7671 // GPRs, split between a GPR and the stack, or passed completely on the
7672 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7673 // cases.
7674 MCRegister Reg = State.AllocateReg(ArgGPRs);
7675 if (!Reg) {
7676 int64_t StackOffset = State.AllocateStack(8, Align(8));
7677 State.addLoc(
7678 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7679 return false;
7680 }
7681 LocVT = MVT::i32;
7682 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7683 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7684 if (HiReg) {
7685 State.addLoc(
7686 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7687 } else {
7688 int64_t StackOffset = State.AllocateStack(4, Align(4));
7689 State.addLoc(
7690 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7691 }
7692 return false;
7693 }
7694
7695 // Split arguments might be passed indirectly, so keep track of the pending
7696 // values.
7697 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7698 LocVT = GRLenVT;
7699 LocInfo = CCValAssign::Indirect;
7700 PendingLocs.push_back(
7701 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7702 PendingArgFlags.push_back(ArgFlags);
7703 if (!ArgFlags.isSplitEnd()) {
7704 return false;
7705 }
7706 }
7707
7708 // If the split argument only had two elements, it should be passed directly
7709 // in registers or on the stack.
7710 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7711 PendingLocs.size() <= 2) {
7712 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7713 // Apply the normal calling convention rules to the first half of the
7714 // split argument.
7715 CCValAssign VA = PendingLocs[0];
7716 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7717 PendingLocs.clear();
7718 PendingArgFlags.clear();
7719 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7720 ArgFlags);
7721 }
7722
7723 // Allocate to a register if possible, or else a stack slot.
7724 Register Reg;
7725 unsigned StoreSizeBytes = GRLen / 8;
7726 Align StackAlign = Align(GRLen / 8);
7727
7728 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7729 Reg = State.AllocateReg(ArgFPR32s);
7730 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7731 Reg = State.AllocateReg(ArgFPR64s);
7732 } else if (ValVT.is128BitVector()) {
7733 Reg = State.AllocateReg(ArgVRs);
7734 UseGPRForFloat = false;
7735 StoreSizeBytes = 16;
7736 StackAlign = Align(16);
7737 } else if (ValVT.is256BitVector()) {
7738 Reg = State.AllocateReg(ArgXRs);
7739 UseGPRForFloat = false;
7740 StoreSizeBytes = 32;
7741 StackAlign = Align(32);
7742 } else {
7743 Reg = State.AllocateReg(ArgGPRs);
7744 }
7745
7746 unsigned StackOffset =
7747 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7748
7749 // If we reach this point and PendingLocs is non-empty, we must be at the
7750 // end of a split argument that must be passed indirectly.
7751 if (!PendingLocs.empty()) {
7752 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7753 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7754 for (auto &It : PendingLocs) {
7755 if (Reg)
7756 It.convertToReg(Reg);
7757 else
7758 It.convertToMem(StackOffset);
7759 State.addLoc(It);
7760 }
7761 PendingLocs.clear();
7762 PendingArgFlags.clear();
7763 return false;
7764 }
7765 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7766 "Expected an GRLenVT at this stage");
7767
7768 if (Reg) {
7769 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7770 return false;
7771 }
7772
7773 // When a floating-point value is passed on the stack, no bit-cast is needed.
7774 if (ValVT.isFloatingPoint()) {
7775 LocVT = ValVT;
7776 LocInfo = CCValAssign::Full;
7777 }
7778
7779 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7780 return false;
7781}
7782
7783void LoongArchTargetLowering::analyzeInputArgs(
7784 MachineFunction &MF, CCState &CCInfo,
7785 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7786 LoongArchCCAssignFn Fn) const {
7787 FunctionType *FType = MF.getFunction().getFunctionType();
7788 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7789 MVT ArgVT = Ins[i].VT;
7790 Type *ArgTy = nullptr;
7791 if (IsRet)
7792 ArgTy = FType->getReturnType();
7793 else if (Ins[i].isOrigArg())
7794 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7796 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7797 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7798 CCInfo, IsRet, ArgTy)) {
7799 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7800 << '\n');
7801 llvm_unreachable("");
7802 }
7803 }
7804}
7805
7806void LoongArchTargetLowering::analyzeOutputArgs(
7807 MachineFunction &MF, CCState &CCInfo,
7808 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7809 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7810 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7811 MVT ArgVT = Outs[i].VT;
7812 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7814 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7815 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7816 CCInfo, IsRet, OrigTy)) {
7817 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7818 << "\n");
7819 llvm_unreachable("");
7820 }
7821 }
7822}
7823
7824// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7825// values.
7827 const CCValAssign &VA, const SDLoc &DL) {
7828 switch (VA.getLocInfo()) {
7829 default:
7830 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7831 case CCValAssign::Full:
7833 break;
7834 case CCValAssign::BCvt:
7835 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7836 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7837 else
7838 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7839 break;
7840 }
7841 return Val;
7842}
7843
7845 const CCValAssign &VA, const SDLoc &DL,
7846 const ISD::InputArg &In,
7847 const LoongArchTargetLowering &TLI) {
7850 EVT LocVT = VA.getLocVT();
7851 SDValue Val;
7852 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7853 Register VReg = RegInfo.createVirtualRegister(RC);
7854 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7855 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7856
7857 // If input is sign extended from 32 bits, note it for the OptW pass.
7858 if (In.isOrigArg()) {
7859 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7860 if (OrigArg->getType()->isIntegerTy()) {
7861 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7862 // An input zero extended from i31 can also be considered sign extended.
7863 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7864 (BitWidth < 32 && In.Flags.isZExt())) {
7867 LAFI->addSExt32Register(VReg);
7868 }
7869 }
7870 }
7871
7872 return convertLocVTToValVT(DAG, Val, VA, DL);
7873}
7874
7875// The caller is responsible for loading the full value if the argument is
7876// passed with CCValAssign::Indirect.
7878 const CCValAssign &VA, const SDLoc &DL) {
7880 MachineFrameInfo &MFI = MF.getFrameInfo();
7881 EVT ValVT = VA.getValVT();
7882 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7883 /*IsImmutable=*/true);
7884 SDValue FIN = DAG.getFrameIndex(
7886
7887 ISD::LoadExtType ExtType;
7888 switch (VA.getLocInfo()) {
7889 default:
7890 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7891 case CCValAssign::Full:
7893 case CCValAssign::BCvt:
7894 ExtType = ISD::NON_EXTLOAD;
7895 break;
7896 }
7897 return DAG.getExtLoad(
7898 ExtType, DL, VA.getLocVT(), Chain, FIN,
7900}
7901
7903 const CCValAssign &VA,
7904 const CCValAssign &HiVA,
7905 const SDLoc &DL) {
7906 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7907 "Unexpected VA");
7909 MachineFrameInfo &MFI = MF.getFrameInfo();
7911
7912 assert(VA.isRegLoc() && "Expected register VA assignment");
7913
7914 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7915 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7916 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7917 SDValue Hi;
7918 if (HiVA.isMemLoc()) {
7919 // Second half of f64 is passed on the stack.
7920 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7921 /*IsImmutable=*/true);
7922 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7923 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7925 } else {
7926 // Second half of f64 is passed in another GPR.
7927 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7928 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7929 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7930 }
7931 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7932}
7933
7935 const CCValAssign &VA, const SDLoc &DL) {
7936 EVT LocVT = VA.getLocVT();
7937
7938 switch (VA.getLocInfo()) {
7939 default:
7940 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7941 case CCValAssign::Full:
7942 break;
7943 case CCValAssign::BCvt:
7944 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7945 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7946 else
7947 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7948 break;
7949 }
7950 return Val;
7951}
7952
7953static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7954 CCValAssign::LocInfo LocInfo,
7955 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7956 CCState &State) {
7957 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7958 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7959 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7960 static const MCPhysReg GPRList[] = {
7961 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7962 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7963 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7964 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7965 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7966 return false;
7967 }
7968 }
7969
7970 if (LocVT == MVT::f32) {
7971 // Pass in STG registers: F1, F2, F3, F4
7972 // fs0,fs1,fs2,fs3
7973 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7974 LoongArch::F26, LoongArch::F27};
7975 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7976 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7977 return false;
7978 }
7979 }
7980
7981 if (LocVT == MVT::f64) {
7982 // Pass in STG registers: D1, D2, D3, D4
7983 // fs4,fs5,fs6,fs7
7984 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7985 LoongArch::F30_64, LoongArch::F31_64};
7986 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7987 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7988 return false;
7989 }
7990 }
7991
7992 report_fatal_error("No registers left in GHC calling convention");
7993 return true;
7994}
7995
7996// Transform physical registers into virtual registers.
7998 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7999 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8000 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8001
8003
8004 switch (CallConv) {
8005 default:
8006 llvm_unreachable("Unsupported calling convention");
8007 case CallingConv::C:
8008 case CallingConv::Fast:
8010 break;
8011 case CallingConv::GHC:
8012 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8013 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8015 "GHC calling convention requires the F and D extensions");
8016 }
8017
8018 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8019 MVT GRLenVT = Subtarget.getGRLenVT();
8020 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8021 // Used with varargs to acumulate store chains.
8022 std::vector<SDValue> OutChains;
8023
8024 // Assign locations to all of the incoming arguments.
8026 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8027
8028 if (CallConv == CallingConv::GHC)
8030 else
8031 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8032
8033 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8034 CCValAssign &VA = ArgLocs[i];
8035 SDValue ArgValue;
8036 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8037 // case.
8038 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8039 assert(VA.needsCustom());
8040 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8041 } else if (VA.isRegLoc())
8042 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8043 else
8044 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8045 if (VA.getLocInfo() == CCValAssign::Indirect) {
8046 // If the original argument was split and passed by reference, we need to
8047 // load all parts of it here (using the same address).
8048 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8050 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8051 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8052 assert(ArgPartOffset == 0);
8053 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8054 CCValAssign &PartVA = ArgLocs[i + 1];
8055 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8056 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8057 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8058 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8060 ++i;
8061 ++InsIdx;
8062 }
8063 continue;
8064 }
8065 InVals.push_back(ArgValue);
8066 }
8067
8068 if (IsVarArg) {
8070 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8071 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8072 MachineFrameInfo &MFI = MF.getFrameInfo();
8073 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8074 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8075
8076 // Offset of the first variable argument from stack pointer, and size of
8077 // the vararg save area. For now, the varargs save area is either zero or
8078 // large enough to hold a0-a7.
8079 int VaArgOffset, VarArgsSaveSize;
8080
8081 // If all registers are allocated, then all varargs must be passed on the
8082 // stack and we don't need to save any argregs.
8083 if (ArgRegs.size() == Idx) {
8084 VaArgOffset = CCInfo.getStackSize();
8085 VarArgsSaveSize = 0;
8086 } else {
8087 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8088 VaArgOffset = -VarArgsSaveSize;
8089 }
8090
8091 // Record the frame index of the first variable argument
8092 // which is a value necessary to VASTART.
8093 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8094 LoongArchFI->setVarArgsFrameIndex(FI);
8095
8096 // If saving an odd number of registers then create an extra stack slot to
8097 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8098 // offsets to even-numbered registered remain 2*GRLen-aligned.
8099 if (Idx % 2) {
8100 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8101 true);
8102 VarArgsSaveSize += GRLenInBytes;
8103 }
8104
8105 // Copy the integer registers that may have been used for passing varargs
8106 // to the vararg save area.
8107 for (unsigned I = Idx; I < ArgRegs.size();
8108 ++I, VaArgOffset += GRLenInBytes) {
8109 const Register Reg = RegInfo.createVirtualRegister(RC);
8110 RegInfo.addLiveIn(ArgRegs[I], Reg);
8111 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8112 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8113 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8114 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8116 cast<StoreSDNode>(Store.getNode())
8117 ->getMemOperand()
8118 ->setValue((Value *)nullptr);
8119 OutChains.push_back(Store);
8120 }
8121 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8122 }
8123
8124 // All stores are grouped in one node to allow the matching between
8125 // the size of Ins and InVals. This only happens for vararg functions.
8126 if (!OutChains.empty()) {
8127 OutChains.push_back(Chain);
8128 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8129 }
8130
8131 return Chain;
8132}
8133
8135 return CI->isTailCall();
8136}
8137
8138// Check if the return value is used as only a return value, as otherwise
8139// we can't perform a tail-call.
8141 SDValue &Chain) const {
8142 if (N->getNumValues() != 1)
8143 return false;
8144 if (!N->hasNUsesOfValue(1, 0))
8145 return false;
8146
8147 SDNode *Copy = *N->user_begin();
8148 if (Copy->getOpcode() != ISD::CopyToReg)
8149 return false;
8150
8151 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8152 // isn't safe to perform a tail call.
8153 if (Copy->getGluedNode())
8154 return false;
8155
8156 // The copy must be used by a LoongArchISD::RET, and nothing else.
8157 bool HasRet = false;
8158 for (SDNode *Node : Copy->users()) {
8159 if (Node->getOpcode() != LoongArchISD::RET)
8160 return false;
8161 HasRet = true;
8162 }
8163
8164 if (!HasRet)
8165 return false;
8166
8167 Chain = Copy->getOperand(0);
8168 return true;
8169}
8170
8171// Check whether the call is eligible for tail call optimization.
8172bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8173 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8174 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8175
8176 auto CalleeCC = CLI.CallConv;
8177 auto &Outs = CLI.Outs;
8178 auto &Caller = MF.getFunction();
8179 auto CallerCC = Caller.getCallingConv();
8180
8181 // Do not tail call opt if the stack is used to pass parameters.
8182 if (CCInfo.getStackSize() != 0)
8183 return false;
8184
8185 // Do not tail call opt if any parameters need to be passed indirectly.
8186 for (auto &VA : ArgLocs)
8187 if (VA.getLocInfo() == CCValAssign::Indirect)
8188 return false;
8189
8190 // Do not tail call opt if either caller or callee uses struct return
8191 // semantics.
8192 auto IsCallerStructRet = Caller.hasStructRetAttr();
8193 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8194 if (IsCallerStructRet || IsCalleeStructRet)
8195 return false;
8196
8197 // Do not tail call opt if either the callee or caller has a byval argument.
8198 for (auto &Arg : Outs)
8199 if (Arg.Flags.isByVal())
8200 return false;
8201
8202 // The callee has to preserve all registers the caller needs to preserve.
8203 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8204 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8205 if (CalleeCC != CallerCC) {
8206 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8207 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8208 return false;
8209 }
8210 return true;
8211}
8212
8214 return DAG.getDataLayout().getPrefTypeAlign(
8215 VT.getTypeForEVT(*DAG.getContext()));
8216}
8217
8218// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8219// and output parameter nodes.
8220SDValue
8222 SmallVectorImpl<SDValue> &InVals) const {
8223 SelectionDAG &DAG = CLI.DAG;
8224 SDLoc &DL = CLI.DL;
8226 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8228 SDValue Chain = CLI.Chain;
8229 SDValue Callee = CLI.Callee;
8230 CallingConv::ID CallConv = CLI.CallConv;
8231 bool IsVarArg = CLI.IsVarArg;
8232 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8233 MVT GRLenVT = Subtarget.getGRLenVT();
8234 bool &IsTailCall = CLI.IsTailCall;
8235
8237
8238 // Analyze the operands of the call, assigning locations to each operand.
8240 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8241
8242 if (CallConv == CallingConv::GHC)
8243 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8244 else
8245 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8246
8247 // Check if it's really possible to do a tail call.
8248 if (IsTailCall)
8249 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8250
8251 if (IsTailCall)
8252 ++NumTailCalls;
8253 else if (CLI.CB && CLI.CB->isMustTailCall())
8254 report_fatal_error("failed to perform tail call elimination on a call "
8255 "site marked musttail");
8256
8257 // Get a count of how many bytes are to be pushed on the stack.
8258 unsigned NumBytes = ArgCCInfo.getStackSize();
8259
8260 // Create local copies for byval args.
8261 SmallVector<SDValue> ByValArgs;
8262 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8263 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8264 if (!Flags.isByVal())
8265 continue;
8266
8267 SDValue Arg = OutVals[i];
8268 unsigned Size = Flags.getByValSize();
8269 Align Alignment = Flags.getNonZeroByValAlign();
8270
8271 int FI =
8272 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8273 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8274 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8275
8276 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8277 /*IsVolatile=*/false,
8278 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8280 ByValArgs.push_back(FIPtr);
8281 }
8282
8283 if (!IsTailCall)
8284 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8285
8286 // Copy argument values to their designated locations.
8288 SmallVector<SDValue> MemOpChains;
8289 SDValue StackPtr;
8290 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8291 ++i, ++OutIdx) {
8292 CCValAssign &VA = ArgLocs[i];
8293 SDValue ArgValue = OutVals[OutIdx];
8294 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8295
8296 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8297 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8298 assert(VA.isRegLoc() && "Expected register VA assignment");
8299 assert(VA.needsCustom());
8300 SDValue SplitF64 =
8302 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8303 SDValue Lo = SplitF64.getValue(0);
8304 SDValue Hi = SplitF64.getValue(1);
8305
8306 Register RegLo = VA.getLocReg();
8307 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8308
8309 // Get the CCValAssign for the Hi part.
8310 CCValAssign &HiVA = ArgLocs[++i];
8311
8312 if (HiVA.isMemLoc()) {
8313 // Second half of f64 is passed on the stack.
8314 if (!StackPtr.getNode())
8315 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8317 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8318 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8319 // Emit the store.
8320 MemOpChains.push_back(DAG.getStore(
8321 Chain, DL, Hi, Address,
8323 } else {
8324 // Second half of f64 is passed in another GPR.
8325 Register RegHigh = HiVA.getLocReg();
8326 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8327 }
8328 continue;
8329 }
8330
8331 // Promote the value if needed.
8332 // For now, only handle fully promoted and indirect arguments.
8333 if (VA.getLocInfo() == CCValAssign::Indirect) {
8334 // Store the argument in a stack slot and pass its address.
8335 Align StackAlign =
8336 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8337 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8338 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8339 // If the original argument was split and passed by reference, we need to
8340 // store the required parts of it here (and pass just one address).
8341 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8342 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8343 assert(ArgPartOffset == 0);
8344 // Calculate the total size to store. We don't have access to what we're
8345 // actually storing other than performing the loop and collecting the
8346 // info.
8348 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8349 SDValue PartValue = OutVals[OutIdx + 1];
8350 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8351 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8352 EVT PartVT = PartValue.getValueType();
8353
8354 StoredSize += PartVT.getStoreSize();
8355 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8356 Parts.push_back(std::make_pair(PartValue, Offset));
8357 ++i;
8358 ++OutIdx;
8359 }
8360 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8361 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8362 MemOpChains.push_back(
8363 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8365 for (const auto &Part : Parts) {
8366 SDValue PartValue = Part.first;
8367 SDValue PartOffset = Part.second;
8369 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8370 MemOpChains.push_back(
8371 DAG.getStore(Chain, DL, PartValue, Address,
8373 }
8374 ArgValue = SpillSlot;
8375 } else {
8376 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8377 }
8378
8379 // Use local copy if it is a byval arg.
8380 if (Flags.isByVal())
8381 ArgValue = ByValArgs[j++];
8382
8383 if (VA.isRegLoc()) {
8384 // Queue up the argument copies and emit them at the end.
8385 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8386 } else {
8387 assert(VA.isMemLoc() && "Argument not register or memory");
8388 assert(!IsTailCall && "Tail call not allowed if stack is used "
8389 "for passing parameters");
8390
8391 // Work out the address of the stack slot.
8392 if (!StackPtr.getNode())
8393 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8395 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8397
8398 // Emit the store.
8399 MemOpChains.push_back(
8400 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8401 }
8402 }
8403
8404 // Join the stores, which are independent of one another.
8405 if (!MemOpChains.empty())
8406 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8407
8408 SDValue Glue;
8409
8410 // Build a sequence of copy-to-reg nodes, chained and glued together.
8411 for (auto &Reg : RegsToPass) {
8412 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8413 Glue = Chain.getValue(1);
8414 }
8415
8416 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8417 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8418 // split it and then direct call can be matched by PseudoCALL.
8420 const GlobalValue *GV = S->getGlobal();
8421 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8424 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8425 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8426 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8429 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8430 }
8431
8432 // The first call operand is the chain and the second is the target address.
8434 Ops.push_back(Chain);
8435 Ops.push_back(Callee);
8436
8437 // Add argument registers to the end of the list so that they are
8438 // known live into the call.
8439 for (auto &Reg : RegsToPass)
8440 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8441
8442 if (!IsTailCall) {
8443 // Add a register mask operand representing the call-preserved registers.
8444 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8445 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8446 assert(Mask && "Missing call preserved mask for calling convention");
8447 Ops.push_back(DAG.getRegisterMask(Mask));
8448 }
8449
8450 // Glue the call to the argument copies, if any.
8451 if (Glue.getNode())
8452 Ops.push_back(Glue);
8453
8454 // Emit the call.
8455 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8456 unsigned Op;
8457 switch (DAG.getTarget().getCodeModel()) {
8458 default:
8459 report_fatal_error("Unsupported code model");
8460 case CodeModel::Small:
8461 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8462 break;
8463 case CodeModel::Medium:
8464 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8466 break;
8467 case CodeModel::Large:
8468 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8470 break;
8471 }
8472
8473 if (IsTailCall) {
8475 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8476 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8477 return Ret;
8478 }
8479
8480 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8481 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8482 Glue = Chain.getValue(1);
8483
8484 // Mark the end of the call, which is glued to the call itself.
8485 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8486 Glue = Chain.getValue(1);
8487
8488 // Assign locations to each value returned by this call.
8490 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8491 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8492
8493 // Copy all of the result registers out of their specified physreg.
8494 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8495 auto &VA = RVLocs[i];
8496 // Copy the value out.
8497 SDValue RetValue =
8498 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8499 // Glue the RetValue to the end of the call sequence.
8500 Chain = RetValue.getValue(1);
8501 Glue = RetValue.getValue(2);
8502
8503 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8504 assert(VA.needsCustom());
8505 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8506 MVT::i32, Glue);
8507 Chain = RetValue2.getValue(1);
8508 Glue = RetValue2.getValue(2);
8509 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8510 RetValue, RetValue2);
8511 } else
8512 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8513
8514 InVals.push_back(RetValue);
8515 }
8516
8517 return Chain;
8518}
8519
8521 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8522 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8523 const Type *RetTy) const {
8525 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8526
8527 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8528 LoongArchABI::ABI ABI =
8529 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8530 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8531 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8532 return false;
8533 }
8534 return true;
8535}
8536
8538 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8540 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8541 SelectionDAG &DAG) const {
8542 // Stores the assignment of the return value to a location.
8544
8545 // Info about the registers and stack slot.
8546 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8547 *DAG.getContext());
8548
8549 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8550 nullptr, CC_LoongArch);
8551 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8552 report_fatal_error("GHC functions return void only");
8553 SDValue Glue;
8554 SmallVector<SDValue, 4> RetOps(1, Chain);
8555
8556 // Copy the result values into the output registers.
8557 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8558 SDValue Val = OutVals[OutIdx];
8559 CCValAssign &VA = RVLocs[i];
8560 assert(VA.isRegLoc() && "Can only return in registers!");
8561
8562 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8563 // Handle returning f64 on LA32D with a soft float ABI.
8564 assert(VA.isRegLoc() && "Expected return via registers");
8565 assert(VA.needsCustom());
8567 DAG.getVTList(MVT::i32, MVT::i32), Val);
8568 SDValue Lo = SplitF64.getValue(0);
8569 SDValue Hi = SplitF64.getValue(1);
8570 Register RegLo = VA.getLocReg();
8571 Register RegHi = RVLocs[++i].getLocReg();
8572
8573 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8574 Glue = Chain.getValue(1);
8575 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8576 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8577 Glue = Chain.getValue(1);
8578 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8579 } else {
8580 // Handle a 'normal' return.
8581 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8582 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8583
8584 // Guarantee that all emitted copies are stuck together.
8585 Glue = Chain.getValue(1);
8586 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8587 }
8588 }
8589
8590 RetOps[0] = Chain; // Update chain.
8591
8592 // Add the glue node if we have it.
8593 if (Glue.getNode())
8594 RetOps.push_back(Glue);
8595
8596 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8597}
8598
8599// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8600// Note: The following prefixes are excluded:
8601// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8602// as they can be represented using [x]vrepli.[whb]
8604 const APInt &SplatValue, const unsigned SplatBitSize) const {
8605 uint64_t RequiredImm = 0;
8606 uint64_t V = SplatValue.getZExtValue();
8607 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8608 // 4'b0101
8609 RequiredImm = (0b10101 << 8) | (V >> 8);
8610 return {true, RequiredImm};
8611 } else if (SplatBitSize == 32) {
8612 // 4'b0001
8613 if (!(V & 0xFFFF00FF)) {
8614 RequiredImm = (0b10001 << 8) | (V >> 8);
8615 return {true, RequiredImm};
8616 }
8617 // 4'b0010
8618 if (!(V & 0xFF00FFFF)) {
8619 RequiredImm = (0b10010 << 8) | (V >> 16);
8620 return {true, RequiredImm};
8621 }
8622 // 4'b0011
8623 if (!(V & 0x00FFFFFF)) {
8624 RequiredImm = (0b10011 << 8) | (V >> 24);
8625 return {true, RequiredImm};
8626 }
8627 // 4'b0110
8628 if ((V & 0xFFFF00FF) == 0xFF) {
8629 RequiredImm = (0b10110 << 8) | (V >> 8);
8630 return {true, RequiredImm};
8631 }
8632 // 4'b0111
8633 if ((V & 0xFF00FFFF) == 0xFFFF) {
8634 RequiredImm = (0b10111 << 8) | (V >> 16);
8635 return {true, RequiredImm};
8636 }
8637 // 4'b1010
8638 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8639 RequiredImm =
8640 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8641 return {true, RequiredImm};
8642 }
8643 } else if (SplatBitSize == 64) {
8644 // 4'b1011
8645 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8646 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8647 RequiredImm =
8648 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8649 return {true, RequiredImm};
8650 }
8651 // 4'b1100
8652 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8653 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8654 RequiredImm =
8655 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8656 return {true, RequiredImm};
8657 }
8658 // 4'b1001
8659 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8660 uint8_t res = 0;
8661 for (int i = 0; i < 8; ++i) {
8662 uint8_t byte = x & 0xFF;
8663 if (byte == 0 || byte == 0xFF)
8664 res |= ((byte & 1) << i);
8665 else
8666 return {false, 0};
8667 x >>= 8;
8668 }
8669 return {true, res};
8670 };
8671 auto [IsSame, Suffix] = sameBitsPreByte(V);
8672 if (IsSame) {
8673 RequiredImm = (0b11001 << 8) | Suffix;
8674 return {true, RequiredImm};
8675 }
8676 }
8677 return {false, RequiredImm};
8678}
8679
8681 EVT VT) const {
8682 if (!Subtarget.hasExtLSX())
8683 return false;
8684
8685 if (VT == MVT::f32) {
8686 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8687 return (masked == 0x3e000000 || masked == 0x40000000);
8688 }
8689
8690 if (VT == MVT::f64) {
8691 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8692 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8693 }
8694
8695 return false;
8696}
8697
8698bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8699 bool ForCodeSize) const {
8700 // TODO: Maybe need more checks here after vector extension is supported.
8701 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8702 return false;
8703 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8704 return false;
8705 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8706}
8707
8709 return true;
8710}
8711
8713 return true;
8714}
8715
8716bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8717 const Instruction *I) const {
8718 if (!Subtarget.is64Bit())
8719 return isa<LoadInst>(I) || isa<StoreInst>(I);
8720
8721 if (isa<LoadInst>(I))
8722 return true;
8723
8724 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8725 // require fences beacuse we can use amswap_db.[w/d].
8726 Type *Ty = I->getOperand(0)->getType();
8727 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8728 unsigned Size = Ty->getIntegerBitWidth();
8729 return (Size == 8 || Size == 16);
8730 }
8731
8732 return false;
8733}
8734
8736 LLVMContext &Context,
8737 EVT VT) const {
8738 if (!VT.isVector())
8739 return getPointerTy(DL);
8741}
8742
8744 EVT VT = Y.getValueType();
8745
8746 if (VT.isVector())
8747 return Subtarget.hasExtLSX() && VT.isInteger();
8748
8749 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8750}
8751
8753 const CallInst &I,
8754 MachineFunction &MF,
8755 unsigned Intrinsic) const {
8756 switch (Intrinsic) {
8757 default:
8758 return false;
8759 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8760 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8761 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8762 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8763 Info.opc = ISD::INTRINSIC_W_CHAIN;
8764 Info.memVT = MVT::i32;
8765 Info.ptrVal = I.getArgOperand(0);
8766 Info.offset = 0;
8767 Info.align = Align(4);
8770 return true;
8771 // TODO: Add more Intrinsics later.
8772 }
8773}
8774
8775// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8776// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8777// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8778// regression, we need to implement it manually.
8781
8783 Op == AtomicRMWInst::And) &&
8784 "Unable to expand");
8785 unsigned MinWordSize = 4;
8786
8787 IRBuilder<> Builder(AI);
8788 LLVMContext &Ctx = Builder.getContext();
8789 const DataLayout &DL = AI->getDataLayout();
8790 Type *ValueType = AI->getType();
8791 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8792
8793 Value *Addr = AI->getPointerOperand();
8794 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8795 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8796
8797 Value *AlignedAddr = Builder.CreateIntrinsic(
8798 Intrinsic::ptrmask, {PtrTy, IntTy},
8799 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8800 "AlignedAddr");
8801
8802 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8803 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8804 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8805 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8806 Value *Mask = Builder.CreateShl(
8807 ConstantInt::get(WordType,
8808 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8809 ShiftAmt, "Mask");
8810 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8811 Value *ValOperand_Shifted =
8812 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8813 ShiftAmt, "ValOperand_Shifted");
8814 Value *NewOperand;
8815 if (Op == AtomicRMWInst::And)
8816 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8817 else
8818 NewOperand = ValOperand_Shifted;
8819
8820 AtomicRMWInst *NewAI =
8821 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8822 AI->getOrdering(), AI->getSyncScopeID());
8823
8824 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8825 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8826 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8827 AI->replaceAllUsesWith(FinalOldResult);
8828 AI->eraseFromParent();
8829}
8830
8833 // TODO: Add more AtomicRMWInst that needs to be extended.
8834
8835 // Since floating-point operation requires a non-trivial set of data
8836 // operations, use CmpXChg to expand.
8837 if (AI->isFloatingPointOperation() ||
8843
8844 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8847 AI->getOperation() == AtomicRMWInst::Sub)) {
8849 }
8850
8851 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8852 if (Subtarget.hasLAMCAS()) {
8853 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8857 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8859 }
8860
8861 if (Size == 8 || Size == 16)
8864}
8865
8866static Intrinsic::ID
8868 AtomicRMWInst::BinOp BinOp) {
8869 if (GRLen == 64) {
8870 switch (BinOp) {
8871 default:
8872 llvm_unreachable("Unexpected AtomicRMW BinOp");
8874 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8875 case AtomicRMWInst::Add:
8876 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8877 case AtomicRMWInst::Sub:
8878 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8880 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8882 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8884 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8885 case AtomicRMWInst::Max:
8886 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8887 case AtomicRMWInst::Min:
8888 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8889 // TODO: support other AtomicRMWInst.
8890 }
8891 }
8892
8893 if (GRLen == 32) {
8894 switch (BinOp) {
8895 default:
8896 llvm_unreachable("Unexpected AtomicRMW BinOp");
8898 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8899 case AtomicRMWInst::Add:
8900 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8901 case AtomicRMWInst::Sub:
8902 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8904 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8906 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8908 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8909 case AtomicRMWInst::Max:
8910 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8911 case AtomicRMWInst::Min:
8912 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8913 // TODO: support other AtomicRMWInst.
8914 }
8915 }
8916
8917 llvm_unreachable("Unexpected GRLen\n");
8918}
8919
8922 AtomicCmpXchgInst *CI) const {
8923
8924 if (Subtarget.hasLAMCAS())
8926
8928 if (Size == 8 || Size == 16)
8931}
8932
8934 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8935 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8936 unsigned GRLen = Subtarget.getGRLen();
8937 AtomicOrdering FailOrd = CI->getFailureOrdering();
8938 Value *FailureOrdering =
8939 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8940 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8941 if (GRLen == 64) {
8942 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8943 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8944 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8945 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8946 }
8947 Type *Tys[] = {AlignedAddr->getType()};
8948 Value *Result = Builder.CreateIntrinsic(
8949 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8950 if (GRLen == 64)
8951 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8952 return Result;
8953}
8954
8956 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8957 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8958 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8959 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8960 // mask, as this produces better code than the LL/SC loop emitted by
8961 // int_loongarch_masked_atomicrmw_xchg.
8962 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8965 if (CVal->isZero())
8966 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8967 Builder.CreateNot(Mask, "Inv_Mask"),
8968 AI->getAlign(), Ord);
8969 if (CVal->isMinusOne())
8970 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8971 AI->getAlign(), Ord);
8972 }
8973
8974 unsigned GRLen = Subtarget.getGRLen();
8975 Value *Ordering =
8976 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8977 Type *Tys[] = {AlignedAddr->getType()};
8979 AI->getModule(),
8981
8982 if (GRLen == 64) {
8983 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8984 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8985 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8986 }
8987
8988 Value *Result;
8989
8990 // Must pass the shift amount needed to sign extend the loaded value prior
8991 // to performing a signed comparison for min/max. ShiftAmt is the number of
8992 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8993 // is the number of bits to left+right shift the value in order to
8994 // sign-extend.
8995 if (AI->getOperation() == AtomicRMWInst::Min ||
8997 const DataLayout &DL = AI->getDataLayout();
8998 unsigned ValWidth =
8999 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9000 Value *SextShamt =
9001 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9002 Result = Builder.CreateCall(LlwOpScwLoop,
9003 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9004 } else {
9005 Result =
9006 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9007 }
9008
9009 if (GRLen == 64)
9010 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9011 return Result;
9012}
9013
9015 const MachineFunction &MF, EVT VT) const {
9016 VT = VT.getScalarType();
9017
9018 if (!VT.isSimple())
9019 return false;
9020
9021 switch (VT.getSimpleVT().SimpleTy) {
9022 case MVT::f32:
9023 case MVT::f64:
9024 return true;
9025 default:
9026 break;
9027 }
9028
9029 return false;
9030}
9031
9033 const Constant *PersonalityFn) const {
9034 return LoongArch::R4;
9035}
9036
9038 const Constant *PersonalityFn) const {
9039 return LoongArch::R5;
9040}
9041
9042//===----------------------------------------------------------------------===//
9043// Target Optimization Hooks
9044//===----------------------------------------------------------------------===//
9045
9047 const LoongArchSubtarget &Subtarget) {
9048 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9049 // IEEE float has 23 digits and double has 52 digits.
9050 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9051 return RefinementSteps;
9052}
9053
9055 SelectionDAG &DAG, int Enabled,
9056 int &RefinementSteps,
9057 bool &UseOneConstNR,
9058 bool Reciprocal) const {
9059 if (Subtarget.hasFrecipe()) {
9060 SDLoc DL(Operand);
9061 EVT VT = Operand.getValueType();
9062
9063 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9064 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9065 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9066 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9067 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9068
9069 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9070 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9071
9072 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9073 if (Reciprocal)
9074 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9075
9076 return Estimate;
9077 }
9078 }
9079
9080 return SDValue();
9081}
9082
9084 SelectionDAG &DAG,
9085 int Enabled,
9086 int &RefinementSteps) const {
9087 if (Subtarget.hasFrecipe()) {
9088 SDLoc DL(Operand);
9089 EVT VT = Operand.getValueType();
9090
9091 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9092 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9093 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9094 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9095 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9096
9097 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9098 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9099
9100 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9101 }
9102 }
9103
9104 return SDValue();
9105}
9106
9107//===----------------------------------------------------------------------===//
9108// LoongArch Inline Assembly Support
9109//===----------------------------------------------------------------------===//
9110
9112LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9113 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9114 //
9115 // 'f': A floating-point register (if available).
9116 // 'k': A memory operand whose address is formed by a base register and
9117 // (optionally scaled) index register.
9118 // 'l': A signed 16-bit constant.
9119 // 'm': A memory operand whose address is formed by a base register and
9120 // offset that is suitable for use in instructions with the same
9121 // addressing mode as st.w and ld.w.
9122 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9123 // instruction)
9124 // 'I': A signed 12-bit constant (for arithmetic instructions).
9125 // 'J': Integer zero.
9126 // 'K': An unsigned 12-bit constant (for logic instructions).
9127 // "ZB": An address that is held in a general-purpose register. The offset is
9128 // zero.
9129 // "ZC": A memory operand whose address is formed by a base register and
9130 // offset that is suitable for use in instructions with the same
9131 // addressing mode as ll.w and sc.w.
9132 if (Constraint.size() == 1) {
9133 switch (Constraint[0]) {
9134 default:
9135 break;
9136 case 'f':
9137 case 'q':
9138 return C_RegisterClass;
9139 case 'l':
9140 case 'I':
9141 case 'J':
9142 case 'K':
9143 return C_Immediate;
9144 case 'k':
9145 return C_Memory;
9146 }
9147 }
9148
9149 if (Constraint == "ZC" || Constraint == "ZB")
9150 return C_Memory;
9151
9152 // 'm' is handled here.
9153 return TargetLowering::getConstraintType(Constraint);
9154}
9155
9156InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9157 StringRef ConstraintCode) const {
9158 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9162 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9163}
9164
9165std::pair<unsigned, const TargetRegisterClass *>
9166LoongArchTargetLowering::getRegForInlineAsmConstraint(
9167 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9168 // First, see if this is a constraint that directly corresponds to a LoongArch
9169 // register class.
9170 if (Constraint.size() == 1) {
9171 switch (Constraint[0]) {
9172 case 'r':
9173 // TODO: Support fixed vectors up to GRLen?
9174 if (VT.isVector())
9175 break;
9176 return std::make_pair(0U, &LoongArch::GPRRegClass);
9177 case 'q':
9178 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9179 case 'f':
9180 if (Subtarget.hasBasicF() && VT == MVT::f32)
9181 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9182 if (Subtarget.hasBasicD() && VT == MVT::f64)
9183 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9184 if (Subtarget.hasExtLSX() &&
9185 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9186 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9187 if (Subtarget.hasExtLASX() &&
9188 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9189 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9190 break;
9191 default:
9192 break;
9193 }
9194 }
9195
9196 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9197 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9198 // constraints while the official register name is prefixed with a '$'. So we
9199 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9200 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9201 // case insensitive, so no need to convert the constraint to upper case here.
9202 //
9203 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9204 // decode the usage of register name aliases into their official names. And
9205 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9206 // official register names.
9207 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9208 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9209 bool IsFP = Constraint[2] == 'f';
9210 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9211 std::pair<unsigned, const TargetRegisterClass *> R;
9213 TRI, join_items("", Temp.first, Temp.second), VT);
9214 // Match those names to the widest floating point register type available.
9215 if (IsFP) {
9216 unsigned RegNo = R.first;
9217 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9218 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9219 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9220 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9221 }
9222 }
9223 }
9224 return R;
9225 }
9226
9227 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9228}
9229
9230void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9231 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9232 SelectionDAG &DAG) const {
9233 // Currently only support length 1 constraints.
9234 if (Constraint.size() == 1) {
9235 switch (Constraint[0]) {
9236 case 'l':
9237 // Validate & create a 16-bit signed immediate operand.
9238 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9239 uint64_t CVal = C->getSExtValue();
9240 if (isInt<16>(CVal))
9241 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9242 Subtarget.getGRLenVT()));
9243 }
9244 return;
9245 case 'I':
9246 // Validate & create a 12-bit signed immediate operand.
9247 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9248 uint64_t CVal = C->getSExtValue();
9249 if (isInt<12>(CVal))
9250 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9251 Subtarget.getGRLenVT()));
9252 }
9253 return;
9254 case 'J':
9255 // Validate & create an integer zero operand.
9256 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9257 if (C->getZExtValue() == 0)
9258 Ops.push_back(
9259 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9260 return;
9261 case 'K':
9262 // Validate & create a 12-bit unsigned immediate operand.
9263 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9264 uint64_t CVal = C->getZExtValue();
9265 if (isUInt<12>(CVal))
9266 Ops.push_back(
9267 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9268 }
9269 return;
9270 default:
9271 break;
9272 }
9273 }
9275}
9276
9277#define GET_REGISTER_MATCHER
9278#include "LoongArchGenAsmMatcher.inc"
9279
9282 const MachineFunction &MF) const {
9283 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9284 std::string NewRegName = Name.second.str();
9285 Register Reg = MatchRegisterAltName(NewRegName);
9286 if (!Reg)
9287 Reg = MatchRegisterName(NewRegName);
9288 if (!Reg)
9289 return Reg;
9290 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9291 if (!ReservedRegs.test(Reg))
9292 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9293 StringRef(RegName) + "\"."));
9294 return Reg;
9295}
9296
9298 EVT VT, SDValue C) const {
9299 // TODO: Support vectors.
9300 if (!VT.isScalarInteger())
9301 return false;
9302
9303 // Omit the optimization if the data size exceeds GRLen.
9304 if (VT.getSizeInBits() > Subtarget.getGRLen())
9305 return false;
9306
9307 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9308 const APInt &Imm = ConstNode->getAPIntValue();
9309 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9310 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9311 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9312 return true;
9313 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9314 if (ConstNode->hasOneUse() &&
9315 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9316 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9317 return true;
9318 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9319 // in which the immediate has two set bits. Or Break (MUL x, imm)
9320 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9321 // equals to (1 << s0) - (1 << s1).
9322 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9323 unsigned Shifts = Imm.countr_zero();
9324 // Reject immediates which can be composed via a single LUI.
9325 if (Shifts >= 12)
9326 return false;
9327 // Reject multiplications can be optimized to
9328 // (SLLI (ALSL x, x, 1/2/3/4), s).
9329 APInt ImmPop = Imm.ashr(Shifts);
9330 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9331 return false;
9332 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9333 // since it needs one more instruction than other 3 cases.
9334 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9335 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9336 (ImmSmall - Imm).isPowerOf2())
9337 return true;
9338 }
9339 }
9340
9341 return false;
9342}
9343
9345 const AddrMode &AM,
9346 Type *Ty, unsigned AS,
9347 Instruction *I) const {
9348 // LoongArch has four basic addressing modes:
9349 // 1. reg
9350 // 2. reg + 12-bit signed offset
9351 // 3. reg + 14-bit signed offset left-shifted by 2
9352 // 4. reg1 + reg2
9353 // TODO: Add more checks after support vector extension.
9354
9355 // No global is ever allowed as a base.
9356 if (AM.BaseGV)
9357 return false;
9358
9359 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9360 // with `UAL` feature.
9361 if (!isInt<12>(AM.BaseOffs) &&
9362 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9363 return false;
9364
9365 switch (AM.Scale) {
9366 case 0:
9367 // "r+i" or just "i", depending on HasBaseReg.
9368 break;
9369 case 1:
9370 // "r+r+i" is not allowed.
9371 if (AM.HasBaseReg && AM.BaseOffs)
9372 return false;
9373 // Otherwise we have "r+r" or "r+i".
9374 break;
9375 case 2:
9376 // "2*r+r" or "2*r+i" is not allowed.
9377 if (AM.HasBaseReg || AM.BaseOffs)
9378 return false;
9379 // Allow "2*r" as "r+r".
9380 break;
9381 default:
9382 return false;
9383 }
9384
9385 return true;
9386}
9387
9389 return isInt<12>(Imm);
9390}
9391
9393 return isInt<12>(Imm);
9394}
9395
9397 // Zexts are free if they can be combined with a load.
9398 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9399 // poorly with type legalization of compares preferring sext.
9400 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9401 EVT MemVT = LD->getMemoryVT();
9402 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9403 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9404 LD->getExtensionType() == ISD::ZEXTLOAD))
9405 return true;
9406 }
9407
9408 return TargetLowering::isZExtFree(Val, VT2);
9409}
9410
9412 EVT DstVT) const {
9413 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9414}
9415
9417 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9418}
9419
9421 // TODO: Support vectors.
9422 if (Y.getValueType().isVector())
9423 return false;
9424
9425 return !isa<ConstantSDNode>(Y);
9426}
9427
9429 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9430 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9431}
9432
9434 Type *Ty, bool IsSigned) const {
9435 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9436 return true;
9437
9438 return IsSigned;
9439}
9440
9442 // Return false to suppress the unnecessary extensions if the LibCall
9443 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9444 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9445 Type.getSizeInBits() < Subtarget.getGRLen()))
9446 return false;
9447 return true;
9448}
9449
9450// memcpy, and other memory intrinsics, typically tries to use wider load/store
9451// if the source/dest is aligned and the copy size is large enough. We therefore
9452// want to align such objects passed to memory intrinsics.
9454 unsigned &MinSize,
9455 Align &PrefAlign) const {
9456 if (!isa<MemIntrinsic>(CI))
9457 return false;
9458
9459 if (Subtarget.is64Bit()) {
9460 MinSize = 8;
9461 PrefAlign = Align(8);
9462 } else {
9463 MinSize = 4;
9464 PrefAlign = Align(4);
9465 }
9466
9467 return true;
9468}
9469
9478
9479bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9480 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9481 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9482 bool IsABIRegCopy = CC.has_value();
9483 EVT ValueVT = Val.getValueType();
9484
9485 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9486 PartVT == MVT::f32) {
9487 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9488 // nan, and cast to f32.
9489 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9490 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9491 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9492 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9493 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9494 Parts[0] = Val;
9495 return true;
9496 }
9497
9498 return false;
9499}
9500
9501SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9502 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9503 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9504 bool IsABIRegCopy = CC.has_value();
9505
9506 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9507 PartVT == MVT::f32) {
9508 SDValue Val = Parts[0];
9509
9510 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9511 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9512 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9513 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9514 return Val;
9515 }
9516
9517 return SDValue();
9518}
9519
9520MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9521 CallingConv::ID CC,
9522 EVT VT) const {
9523 // Use f32 to pass f16.
9524 if (VT == MVT::f16 && Subtarget.hasBasicF())
9525 return MVT::f32;
9526
9528}
9529
9530unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9531 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9532 // Use f32 to pass f16.
9533 if (VT == MVT::f16 && Subtarget.hasBasicF())
9534 return 1;
9535
9537}
9538
9540 SDValue Op, const APInt &OriginalDemandedBits,
9541 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9542 unsigned Depth) const {
9543 EVT VT = Op.getValueType();
9544 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9545 unsigned Opc = Op.getOpcode();
9546 switch (Opc) {
9547 default:
9548 break;
9551 SDValue Src = Op.getOperand(0);
9552 MVT SrcVT = Src.getSimpleValueType();
9553 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9554 unsigned NumElts = SrcVT.getVectorNumElements();
9555
9556 // If we don't need the sign bits at all just return zero.
9557 if (OriginalDemandedBits.countr_zero() >= NumElts)
9558 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9559
9560 // Only demand the vector elements of the sign bits we need.
9561 APInt KnownUndef, KnownZero;
9562 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9563 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9564 TLO, Depth + 1))
9565 return true;
9566
9567 Known.Zero = KnownZero.zext(BitWidth);
9568 Known.Zero.setHighBits(BitWidth - NumElts);
9569
9570 // [X]VMSKLTZ only uses the MSB from each vector element.
9571 KnownBits KnownSrc;
9572 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9573 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9574 Depth + 1))
9575 return true;
9576
9577 if (KnownSrc.One[SrcBits - 1])
9578 Known.One.setLowBits(NumElts);
9579 else if (KnownSrc.Zero[SrcBits - 1])
9580 Known.Zero.setLowBits(NumElts);
9581
9582 // Attempt to avoid multi-use ops if we don't need anything from it.
9584 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9585 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9586 return false;
9587 }
9588 }
9589
9591 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9592}
9593
9595 unsigned Opc = VecOp.getOpcode();
9596
9597 // Assume target opcodes can't be scalarized.
9598 // TODO - do we have any exceptions?
9599 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9600 return false;
9601
9602 // If the vector op is not supported, try to convert to scalar.
9603 EVT VecVT = VecOp.getValueType();
9605 return true;
9606
9607 // If the vector op is supported, but the scalar op is not, the transform may
9608 // not be worthwhile.
9609 EVT ScalarVT = VecVT.getScalarType();
9610 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9611}
9612
9614 unsigned Index) const {
9616 return false;
9617
9618 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9619 return Index == 0;
9620}
9621
9623 unsigned Index) const {
9624 EVT EltVT = VT.getScalarType();
9625
9626 // Extract a scalar FP value from index 0 of a vector is free.
9627 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9628}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...