LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit())
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249
250 if (!Subtarget.hasBasicD()) {
252 if (Subtarget.is64Bit()) {
255 }
256 }
257 }
258
259 // Set operations for 'D' feature.
260
261 if (Subtarget.hasBasicD()) {
262 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
265 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
268 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
269
272 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
276 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
277 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
278 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
282 setOperationAction(ISD::FSIN, MVT::f64, Expand);
283 setOperationAction(ISD::FCOS, MVT::f64, Expand);
284 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
285 setOperationAction(ISD::FPOW, MVT::f64, Expand);
287 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
288 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
289 Subtarget.isSoftFPABI() ? LibCall : Custom);
290 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
291 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293
294 if (Subtarget.is64Bit())
295 setOperationAction(ISD::FRINT, MVT::f64, Legal);
296 }
297
298 // Set operations for 'LSX' feature.
299
300 if (Subtarget.hasExtLSX()) {
302 // Expand all truncating stores and extending loads.
303 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
304 setTruncStoreAction(VT, InnerVT, Expand);
307 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
308 }
309 // By default everything must be expanded. Then we will selectively turn
310 // on ones that can be effectively codegen'd.
311 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 }
314
315 for (MVT VT : LSXVTs) {
316 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
317 setOperationAction(ISD::BITCAST, VT, Legal);
319
323
328 }
329 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
332 Legal);
334 VT, Legal);
341 Expand);
350 }
351 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
353 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
355 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
358 }
359 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
363 setOperationAction(ISD::FSQRT, VT, Legal);
364 setOperationAction(ISD::FNEG, VT, Legal);
367 VT, Expand);
369 }
371 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
372 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
373 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
374 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
375
376 for (MVT VT :
377 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
378 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
380 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
381 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
382 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
383 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
384 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
385 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
386 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
387 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
388 }
389 }
390
391 // Set operations for 'LASX' feature.
392
393 if (Subtarget.hasExtLASX()) {
394 for (MVT VT : LASXVTs) {
395 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
396 setOperationAction(ISD::BITCAST, VT, Legal);
398
404
408 }
409 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
412 Legal);
414 VT, Legal);
421 Expand);
430 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
431 }
432 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
434 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
436 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
439 }
440 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
444 setOperationAction(ISD::FSQRT, VT, Legal);
445 setOperationAction(ISD::FNEG, VT, Legal);
448 VT, Expand);
450 }
451 }
452
453 // Set DAG combine for LA32 and LA64.
454
459
460 // Set DAG combine for 'LSX' feature.
461
462 if (Subtarget.hasExtLSX()) {
464 setTargetDAGCombine(ISD::BITCAST);
465 }
466
467 // Set DAG combine for 'LASX' feature.
468
469 if (Subtarget.hasExtLASX())
471
472 // Compute derived properties from the register classes.
473 computeRegisterProperties(Subtarget.getRegisterInfo());
474
476
479
480 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
481
483
484 // Function alignments.
486 // Set preferred alignments.
487 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
488 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
489 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
490
491 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
492 if (Subtarget.hasLAMCAS())
494
495 if (Subtarget.hasSCQ()) {
497 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
498 }
499}
500
502 const GlobalAddressSDNode *GA) const {
503 // In order to maximise the opportunity for common subexpression elimination,
504 // keep a separate ADD node for the global address offset instead of folding
505 // it in the global address node. Later peephole optimisations may choose to
506 // fold it back in when profitable.
507 return false;
508}
509
511 SelectionDAG &DAG) const {
512 switch (Op.getOpcode()) {
513 case ISD::ATOMIC_FENCE:
514 return lowerATOMIC_FENCE(Op, DAG);
516 return lowerEH_DWARF_CFA(Op, DAG);
518 return lowerGlobalAddress(Op, DAG);
520 return lowerGlobalTLSAddress(Op, DAG);
522 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
524 return lowerINTRINSIC_W_CHAIN(Op, DAG);
526 return lowerINTRINSIC_VOID(Op, DAG);
528 return lowerBlockAddress(Op, DAG);
529 case ISD::JumpTable:
530 return lowerJumpTable(Op, DAG);
531 case ISD::SHL_PARTS:
532 return lowerShiftLeftParts(Op, DAG);
533 case ISD::SRA_PARTS:
534 return lowerShiftRightParts(Op, DAG, true);
535 case ISD::SRL_PARTS:
536 return lowerShiftRightParts(Op, DAG, false);
538 return lowerConstantPool(Op, DAG);
539 case ISD::FP_TO_SINT:
540 return lowerFP_TO_SINT(Op, DAG);
541 case ISD::BITCAST:
542 return lowerBITCAST(Op, DAG);
543 case ISD::UINT_TO_FP:
544 return lowerUINT_TO_FP(Op, DAG);
545 case ISD::SINT_TO_FP:
546 return lowerSINT_TO_FP(Op, DAG);
547 case ISD::VASTART:
548 return lowerVASTART(Op, DAG);
549 case ISD::FRAMEADDR:
550 return lowerFRAMEADDR(Op, DAG);
551 case ISD::RETURNADDR:
552 return lowerRETURNADDR(Op, DAG);
554 return lowerWRITE_REGISTER(Op, DAG);
556 return lowerINSERT_VECTOR_ELT(Op, DAG);
558 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
560 return lowerBUILD_VECTOR(Op, DAG);
562 return lowerCONCAT_VECTORS(Op, DAG);
564 return lowerVECTOR_SHUFFLE(Op, DAG);
565 case ISD::BITREVERSE:
566 return lowerBITREVERSE(Op, DAG);
568 return lowerSCALAR_TO_VECTOR(Op, DAG);
569 case ISD::PREFETCH:
570 return lowerPREFETCH(Op, DAG);
571 case ISD::SELECT:
572 return lowerSELECT(Op, DAG);
573 case ISD::BRCOND:
574 return lowerBRCOND(Op, DAG);
575 case ISD::FP_TO_FP16:
576 return lowerFP_TO_FP16(Op, DAG);
577 case ISD::FP16_TO_FP:
578 return lowerFP16_TO_FP(Op, DAG);
579 case ISD::FP_TO_BF16:
580 return lowerFP_TO_BF16(Op, DAG);
581 case ISD::BF16_TO_FP:
582 return lowerBF16_TO_FP(Op, DAG);
583 case ISD::VECREDUCE_ADD:
584 return lowerVECREDUCE_ADD(Op, DAG);
585 case ISD::VECREDUCE_AND:
586 case ISD::VECREDUCE_OR:
587 case ISD::VECREDUCE_XOR:
588 case ISD::VECREDUCE_SMAX:
589 case ISD::VECREDUCE_SMIN:
590 case ISD::VECREDUCE_UMAX:
591 case ISD::VECREDUCE_UMIN:
592 return lowerVECREDUCE(Op, DAG);
593 case ISD::ConstantFP:
594 return lowerConstantFP(Op, DAG);
595 }
596 return SDValue();
597}
598
599SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
600 SelectionDAG &DAG) const {
601 EVT VT = Op.getValueType();
603 const APFloat &FPVal = CFP->getValueAPF();
604 SDLoc DL(CFP);
605
606 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
607 (VT == MVT::f64 && Subtarget.hasBasicD()));
608
609 // If value is 0.0 or -0.0, just ignore it.
610 if (FPVal.isZero())
611 return SDValue();
612
613 // If lsx enabled, use cheaper 'vldi' instruction if possible.
614 if (isFPImmVLDILegal(FPVal, VT))
615 return SDValue();
616
617 // Construct as integer, and move to float register.
618 APInt INTVal = FPVal.bitcastToAPInt();
619
620 // If more than MaterializeFPImmInsNum instructions will be used to
621 // generate the INTVal and move it to float register, fallback to
622 // use floating point load from the constant pool.
624 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
625 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
626 return SDValue();
627
628 switch (VT.getSimpleVT().SimpleTy) {
629 default:
630 llvm_unreachable("Unexpected floating point type!");
631 break;
632 case MVT::f32: {
633 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
634 if (Subtarget.is64Bit())
635 NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
636 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
638 DL, VT, NewVal);
639 }
640 case MVT::f64: {
641 if (Subtarget.is64Bit()) {
642 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
643 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
644 }
645 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
646 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
647 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
648 }
649 }
650
651 return SDValue();
652}
653
654// Lower vecreduce_add using vhaddw instructions.
655// For Example:
656// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
657// can be lowered to:
658// VHADDW_D_W vr0, vr0, vr0
659// VHADDW_Q_D vr0, vr0, vr0
660// VPICKVE2GR_D a0, vr0, 0
661// ADDI_W a0, a0, 0
662SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
663 SelectionDAG &DAG) const {
664
665 SDLoc DL(Op);
666 MVT OpVT = Op.getSimpleValueType();
667 SDValue Val = Op.getOperand(0);
668
669 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
670 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
671 unsigned ResBits = OpVT.getScalarSizeInBits();
672
673 unsigned LegalVecSize = 128;
674 bool isLASX256Vector =
675 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
676
677 // Ensure operand type legal or enable it legal.
678 while (!isTypeLegal(Val.getSimpleValueType())) {
679 Val = DAG.WidenVector(Val, DL);
680 }
681
682 // NumEles is designed for iterations count, v4i32 for LSX
683 // and v8i32 for LASX should have the same count.
684 if (isLASX256Vector) {
685 NumEles /= 2;
686 LegalVecSize = 256;
687 }
688
689 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
690 MVT IntTy = MVT::getIntegerVT(EleBits);
691 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
692 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
693 }
694
695 if (isLASX256Vector) {
696 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
697 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
698 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
699 }
700
701 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
702 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
703 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
704}
705
706// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
707// For Example:
708// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
709// can be lowered to:
710// VBSRL_V vr1, vr0, 8
711// VMAX_W vr0, vr1, vr0
712// VBSRL_V vr1, vr0, 4
713// VMAX_W vr0, vr1, vr0
714// VPICKVE2GR_W a0, vr0, 0
715// For 256 bit vector, it is illegal and will be spilt into
716// two 128 bit vector by default then processed by this.
717SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
718 SelectionDAG &DAG) const {
719 SDLoc DL(Op);
720
721 MVT OpVT = Op.getSimpleValueType();
722 SDValue Val = Op.getOperand(0);
723
724 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
725 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
726
727 // Ensure operand type legal or enable it legal.
728 while (!isTypeLegal(Val.getSimpleValueType())) {
729 Val = DAG.WidenVector(Val, DL);
730 }
731
732 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
733 MVT VecTy = Val.getSimpleValueType();
734 MVT GRLenVT = Subtarget.getGRLenVT();
735
736 for (int i = NumEles; i > 1; i /= 2) {
737 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
738 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
739 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
740 }
741
742 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
743 DAG.getConstant(0, DL, GRLenVT));
744}
745
746SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
747 SelectionDAG &DAG) const {
748 unsigned IsData = Op.getConstantOperandVal(4);
749
750 // We don't support non-data prefetch.
751 // Just preserve the chain.
752 if (!IsData)
753 return Op.getOperand(0);
754
755 return Op;
756}
757
758// Return true if Val is equal to (setcc LHS, RHS, CC).
759// Return false if Val is the inverse of (setcc LHS, RHS, CC).
760// Otherwise, return std::nullopt.
761static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
762 ISD::CondCode CC, SDValue Val) {
763 assert(Val->getOpcode() == ISD::SETCC);
764 SDValue LHS2 = Val.getOperand(0);
765 SDValue RHS2 = Val.getOperand(1);
766 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
767
768 if (LHS == LHS2 && RHS == RHS2) {
769 if (CC == CC2)
770 return true;
771 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
772 return false;
773 } else if (LHS == RHS2 && RHS == LHS2) {
775 if (CC == CC2)
776 return true;
777 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
778 return false;
779 }
780
781 return std::nullopt;
782}
783
785 const LoongArchSubtarget &Subtarget) {
786 SDValue CondV = N->getOperand(0);
787 SDValue TrueV = N->getOperand(1);
788 SDValue FalseV = N->getOperand(2);
789 MVT VT = N->getSimpleValueType(0);
790 SDLoc DL(N);
791
792 // (select c, -1, y) -> -c | y
793 if (isAllOnesConstant(TrueV)) {
794 SDValue Neg = DAG.getNegative(CondV, DL, VT);
795 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
796 }
797 // (select c, y, -1) -> (c-1) | y
798 if (isAllOnesConstant(FalseV)) {
799 SDValue Neg =
800 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
801 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
802 }
803
804 // (select c, 0, y) -> (c-1) & y
805 if (isNullConstant(TrueV)) {
806 SDValue Neg =
807 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
808 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
809 }
810 // (select c, y, 0) -> -c & y
811 if (isNullConstant(FalseV)) {
812 SDValue Neg = DAG.getNegative(CondV, DL, VT);
813 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
814 }
815
816 // select c, ~x, x --> xor -c, x
817 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
818 const APInt &TrueVal = TrueV->getAsAPIntVal();
819 const APInt &FalseVal = FalseV->getAsAPIntVal();
820 if (~TrueVal == FalseVal) {
821 SDValue Neg = DAG.getNegative(CondV, DL, VT);
822 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
823 }
824 }
825
826 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
827 // when both truev and falsev are also setcc.
828 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
829 FalseV.getOpcode() == ISD::SETCC) {
830 SDValue LHS = CondV.getOperand(0);
831 SDValue RHS = CondV.getOperand(1);
832 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
833
834 // (select x, x, y) -> x | y
835 // (select !x, x, y) -> x & y
836 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
837 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
838 DAG.getFreeze(FalseV));
839 }
840 // (select x, y, x) -> x & y
841 // (select !x, y, x) -> x | y
842 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
843 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
844 DAG.getFreeze(TrueV), FalseV);
845 }
846 }
847
848 return SDValue();
849}
850
851// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
852// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
853// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
854// being `0` or `-1`. In such cases we can replace `select` with `and`.
855// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
856// than `c0`?
857static SDValue
859 const LoongArchSubtarget &Subtarget) {
860 unsigned SelOpNo = 0;
861 SDValue Sel = BO->getOperand(0);
862 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
863 SelOpNo = 1;
864 Sel = BO->getOperand(1);
865 }
866
867 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
868 return SDValue();
869
870 unsigned ConstSelOpNo = 1;
871 unsigned OtherSelOpNo = 2;
872 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
873 ConstSelOpNo = 2;
874 OtherSelOpNo = 1;
875 }
876 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
877 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
878 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
879 return SDValue();
880
881 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
882 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
883 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
884 return SDValue();
885
886 SDLoc DL(Sel);
887 EVT VT = BO->getValueType(0);
888
889 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
890 if (SelOpNo == 1)
891 std::swap(NewConstOps[0], NewConstOps[1]);
892
893 SDValue NewConstOp =
894 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
895 if (!NewConstOp)
896 return SDValue();
897
898 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
899 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
900 return SDValue();
901
902 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
903 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
904 if (SelOpNo == 1)
905 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
906 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
907
908 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
909 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
910 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
911}
912
913// Changes the condition code and swaps operands if necessary, so the SetCC
914// operation matches one of the comparisons supported directly by branches
915// in the LoongArch ISA. May adjust compares to favor compare with 0 over
916// compare with 1/-1.
918 ISD::CondCode &CC, SelectionDAG &DAG) {
919 // If this is a single bit test that can't be handled by ANDI, shift the
920 // bit to be tested to the MSB and perform a signed compare with 0.
921 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
922 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
923 isa<ConstantSDNode>(LHS.getOperand(1))) {
924 uint64_t Mask = LHS.getConstantOperandVal(1);
925 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
926 unsigned ShAmt = 0;
927 if (isPowerOf2_64(Mask)) {
928 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
929 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
930 } else {
931 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
932 }
933
934 LHS = LHS.getOperand(0);
935 if (ShAmt != 0)
936 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
937 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
938 return;
939 }
940 }
941
942 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
943 int64_t C = RHSC->getSExtValue();
944 switch (CC) {
945 default:
946 break;
947 case ISD::SETGT:
948 // Convert X > -1 to X >= 0.
949 if (C == -1) {
950 RHS = DAG.getConstant(0, DL, RHS.getValueType());
951 CC = ISD::SETGE;
952 return;
953 }
954 break;
955 case ISD::SETLT:
956 // Convert X < 1 to 0 >= X.
957 if (C == 1) {
958 RHS = LHS;
959 LHS = DAG.getConstant(0, DL, RHS.getValueType());
960 CC = ISD::SETGE;
961 return;
962 }
963 break;
964 }
965 }
966
967 switch (CC) {
968 default:
969 break;
970 case ISD::SETGT:
971 case ISD::SETLE:
972 case ISD::SETUGT:
973 case ISD::SETULE:
975 std::swap(LHS, RHS);
976 break;
977 }
978}
979
980SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
981 SelectionDAG &DAG) const {
982 SDValue CondV = Op.getOperand(0);
983 SDValue TrueV = Op.getOperand(1);
984 SDValue FalseV = Op.getOperand(2);
985 SDLoc DL(Op);
986 MVT VT = Op.getSimpleValueType();
987 MVT GRLenVT = Subtarget.getGRLenVT();
988
989 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
990 return V;
991
992 if (Op.hasOneUse()) {
993 unsigned UseOpc = Op->user_begin()->getOpcode();
994 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
995 SDNode *BinOp = *Op->user_begin();
996 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
997 DAG, Subtarget)) {
998 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
999 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1000 // may return a constant node and cause crash in lowerSELECT.
1001 if (NewSel.getOpcode() == ISD::SELECT)
1002 return lowerSELECT(NewSel, DAG);
1003 return NewSel;
1004 }
1005 }
1006 }
1007
1008 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1009 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1010 // (select condv, truev, falsev)
1011 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1012 if (CondV.getOpcode() != ISD::SETCC ||
1013 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1014 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1015 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1016
1017 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1018
1019 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1020 }
1021
1022 // If the CondV is the output of a SETCC node which operates on GRLenVT
1023 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1024 // to take advantage of the integer compare+branch instructions. i.e.: (select
1025 // (setcc lhs, rhs, cc), truev, falsev)
1026 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1027 SDValue LHS = CondV.getOperand(0);
1028 SDValue RHS = CondV.getOperand(1);
1029 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1030
1031 // Special case for a select of 2 constants that have a difference of 1.
1032 // Normally this is done by DAGCombine, but if the select is introduced by
1033 // type legalization or op legalization, we miss it. Restricting to SETLT
1034 // case for now because that is what signed saturating add/sub need.
1035 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1036 // but we would probably want to swap the true/false values if the condition
1037 // is SETGE/SETLE to avoid an XORI.
1038 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1039 CCVal == ISD::SETLT) {
1040 const APInt &TrueVal = TrueV->getAsAPIntVal();
1041 const APInt &FalseVal = FalseV->getAsAPIntVal();
1042 if (TrueVal - 1 == FalseVal)
1043 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1044 if (TrueVal + 1 == FalseVal)
1045 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1046 }
1047
1048 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1049 // 1 < x ? x : 1 -> 0 < x ? x : 1
1050 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1051 RHS == TrueV && LHS == FalseV) {
1052 LHS = DAG.getConstant(0, DL, VT);
1053 // 0 <u x is the same as x != 0.
1054 if (CCVal == ISD::SETULT) {
1055 std::swap(LHS, RHS);
1056 CCVal = ISD::SETNE;
1057 }
1058 }
1059
1060 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1061 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1062 RHS == FalseV) {
1063 RHS = DAG.getConstant(0, DL, VT);
1064 }
1065
1066 SDValue TargetCC = DAG.getCondCode(CCVal);
1067
1068 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1069 // (select (setcc lhs, rhs, CC), constant, falsev)
1070 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1071 std::swap(TrueV, FalseV);
1072 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1073 }
1074
1075 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1076 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1077}
1078
1079SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1080 SelectionDAG &DAG) const {
1081 SDValue CondV = Op.getOperand(1);
1082 SDLoc DL(Op);
1083 MVT GRLenVT = Subtarget.getGRLenVT();
1084
1085 if (CondV.getOpcode() == ISD::SETCC) {
1086 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1087 SDValue LHS = CondV.getOperand(0);
1088 SDValue RHS = CondV.getOperand(1);
1089 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1090
1091 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1092
1093 SDValue TargetCC = DAG.getCondCode(CCVal);
1094 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1095 Op.getOperand(0), LHS, RHS, TargetCC,
1096 Op.getOperand(2));
1097 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1098 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1099 Op.getOperand(0), CondV, Op.getOperand(2));
1100 }
1101 }
1102
1103 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1104 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1105 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1106}
1107
1108SDValue
1109LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1110 SelectionDAG &DAG) const {
1111 SDLoc DL(Op);
1112 MVT OpVT = Op.getSimpleValueType();
1113
1114 SDValue Vector = DAG.getUNDEF(OpVT);
1115 SDValue Val = Op.getOperand(0);
1116 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1117
1118 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1119}
1120
1121SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1122 SelectionDAG &DAG) const {
1123 EVT ResTy = Op->getValueType(0);
1124 SDValue Src = Op->getOperand(0);
1125 SDLoc DL(Op);
1126
1127 // LoongArchISD::BITREV_8B is not supported on LA32.
1128 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1129 return SDValue();
1130
1131 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1132 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1133 unsigned int NewEltNum = NewVT.getVectorNumElements();
1134
1135 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1136
1138 for (unsigned int i = 0; i < NewEltNum; i++) {
1139 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1140 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1141 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1142 ? (unsigned)LoongArchISD::BITREV_8B
1143 : (unsigned)ISD::BITREVERSE;
1144 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1145 }
1146 SDValue Res =
1147 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1148
1149 switch (ResTy.getSimpleVT().SimpleTy) {
1150 default:
1151 return SDValue();
1152 case MVT::v16i8:
1153 case MVT::v32i8:
1154 return Res;
1155 case MVT::v8i16:
1156 case MVT::v16i16:
1157 case MVT::v4i32:
1158 case MVT::v8i32: {
1160 for (unsigned int i = 0; i < NewEltNum; i++)
1161 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1162 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1163 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1164 }
1165 }
1166}
1167
1168// Widen element type to get a new mask value (if possible).
1169// For example:
1170// shufflevector <4 x i32> %a, <4 x i32> %b,
1171// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1172// is equivalent to:
1173// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1174// can be lowered to:
1175// VPACKOD_D vr0, vr0, vr1
1177 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1178 unsigned EltBits = VT.getScalarSizeInBits();
1179
1180 if (EltBits > 32 || EltBits == 1)
1181 return SDValue();
1182
1183 SmallVector<int, 8> NewMask;
1184 if (widenShuffleMaskElts(Mask, NewMask)) {
1185 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1186 : MVT::getIntegerVT(EltBits * 2);
1187 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1188 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1189 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1190 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1191 return DAG.getBitcast(
1192 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1193 }
1194 }
1195
1196 return SDValue();
1197}
1198
1199/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1200/// instruction.
1201// The funciton matches elements from one of the input vector shuffled to the
1202// left or right with zeroable elements 'shifted in'. It handles both the
1203// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1204// lane.
1205// Mostly copied from X86.
1206static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1207 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1208 int MaskOffset, const APInt &Zeroable) {
1209 int Size = Mask.size();
1210 unsigned SizeInBits = Size * ScalarSizeInBits;
1211
1212 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1213 for (int i = 0; i < Size; i += Scale)
1214 for (int j = 0; j < Shift; ++j)
1215 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1216 return false;
1217
1218 return true;
1219 };
1220
1221 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1222 int Step = 1) {
1223 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1224 if (!(Mask[i] == -1 || Mask[i] == Low))
1225 return false;
1226 return true;
1227 };
1228
1229 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1230 for (int i = 0; i != Size; i += Scale) {
1231 unsigned Pos = Left ? i + Shift : i;
1232 unsigned Low = Left ? i : i + Shift;
1233 unsigned Len = Scale - Shift;
1234 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1235 return -1;
1236 }
1237
1238 int ShiftEltBits = ScalarSizeInBits * Scale;
1239 bool ByteShift = ShiftEltBits > 64;
1240 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1241 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1242 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1243
1244 // Normalize the scale for byte shifts to still produce an i64 element
1245 // type.
1246 Scale = ByteShift ? Scale / 2 : Scale;
1247
1248 // We need to round trip through the appropriate type for the shift.
1249 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1250 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1251 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1252 return (int)ShiftAmt;
1253 };
1254
1255 unsigned MaxWidth = 128;
1256 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1257 for (int Shift = 1; Shift != Scale; ++Shift)
1258 for (bool Left : {true, false})
1259 if (CheckZeros(Shift, Scale, Left)) {
1260 int ShiftAmt = MatchShift(Shift, Scale, Left);
1261 if (0 < ShiftAmt)
1262 return ShiftAmt;
1263 }
1264
1265 // no match
1266 return -1;
1267}
1268
1269/// Lower VECTOR_SHUFFLE as shift (if possible).
1270///
1271/// For example:
1272/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1273/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1274/// is lowered to:
1275/// (VBSLL_V $v0, $v0, 4)
1276///
1277/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1278/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1279/// is lowered to:
1280/// (VSLLI_D $v0, $v0, 32)
1282 MVT VT, SDValue V1, SDValue V2,
1283 SelectionDAG &DAG,
1284 const LoongArchSubtarget &Subtarget,
1285 const APInt &Zeroable) {
1286 int Size = Mask.size();
1287 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1288
1289 MVT ShiftVT;
1290 SDValue V = V1;
1291 unsigned Opcode;
1292
1293 // Try to match shuffle against V1 shift.
1294 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1295 Mask, 0, Zeroable);
1296
1297 // If V1 failed, try to match shuffle against V2 shift.
1298 if (ShiftAmt < 0) {
1299 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1300 Mask, Size, Zeroable);
1301 V = V2;
1302 }
1303
1304 if (ShiftAmt < 0)
1305 return SDValue();
1306
1307 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1308 "Illegal integer vector type");
1309 V = DAG.getBitcast(ShiftVT, V);
1310 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1311 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1312 return DAG.getBitcast(VT, V);
1313}
1314
1315/// Determine whether a range fits a regular pattern of values.
1316/// This function accounts for the possibility of jumping over the End iterator.
1317template <typename ValType>
1318static bool
1320 unsigned CheckStride,
1322 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1323 auto &I = Begin;
1324
1325 while (I != End) {
1326 if (*I != -1 && *I != ExpectedIndex)
1327 return false;
1328 ExpectedIndex += ExpectedIndexStride;
1329
1330 // Incrementing past End is undefined behaviour so we must increment one
1331 // step at a time and check for End at each step.
1332 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1333 ; // Empty loop body.
1334 }
1335 return true;
1336}
1337
1338/// Compute whether each element of a shuffle is zeroable.
1339///
1340/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1342 SDValue V2, APInt &KnownUndef,
1343 APInt &KnownZero) {
1344 int Size = Mask.size();
1345 KnownUndef = KnownZero = APInt::getZero(Size);
1346
1347 V1 = peekThroughBitcasts(V1);
1348 V2 = peekThroughBitcasts(V2);
1349
1350 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1351 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1352
1353 int VectorSizeInBits = V1.getValueSizeInBits();
1354 int ScalarSizeInBits = VectorSizeInBits / Size;
1355 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1356 (void)ScalarSizeInBits;
1357
1358 for (int i = 0; i < Size; ++i) {
1359 int M = Mask[i];
1360 if (M < 0) {
1361 KnownUndef.setBit(i);
1362 continue;
1363 }
1364 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1365 KnownZero.setBit(i);
1366 continue;
1367 }
1368 }
1369}
1370
1371/// Test whether a shuffle mask is equivalent within each sub-lane.
1372///
1373/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1374/// non-trivial to compute in the face of undef lanes. The representation is
1375/// suitable for use with existing 128-bit shuffles as entries from the second
1376/// vector have been remapped to [LaneSize, 2*LaneSize).
1377static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1378 ArrayRef<int> Mask,
1379 SmallVectorImpl<int> &RepeatedMask) {
1380 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1381 RepeatedMask.assign(LaneSize, -1);
1382 int Size = Mask.size();
1383 for (int i = 0; i < Size; ++i) {
1384 assert(Mask[i] == -1 || Mask[i] >= 0);
1385 if (Mask[i] < 0)
1386 continue;
1387 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1388 // This entry crosses lanes, so there is no way to model this shuffle.
1389 return false;
1390
1391 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1392 // Adjust second vector indices to start at LaneSize instead of Size.
1393 int LocalM =
1394 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1395 if (RepeatedMask[i % LaneSize] < 0)
1396 // This is the first non-undef entry in this slot of a 128-bit lane.
1397 RepeatedMask[i % LaneSize] = LocalM;
1398 else if (RepeatedMask[i % LaneSize] != LocalM)
1399 // Found a mismatch with the repeated mask.
1400 return false;
1401 }
1402 return true;
1403}
1404
1405/// Attempts to match vector shuffle as byte rotation.
1407 ArrayRef<int> Mask) {
1408
1409 SDValue Lo, Hi;
1410 SmallVector<int, 16> RepeatedMask;
1411
1412 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1413 return -1;
1414
1415 int NumElts = RepeatedMask.size();
1416 int Rotation = 0;
1417 int Scale = 16 / NumElts;
1418
1419 for (int i = 0; i < NumElts; ++i) {
1420 int M = RepeatedMask[i];
1421 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1422 "Unexpected mask index.");
1423 if (M < 0)
1424 continue;
1425
1426 // Determine where a rotated vector would have started.
1427 int StartIdx = i - (M % NumElts);
1428 if (StartIdx == 0)
1429 return -1;
1430
1431 // If we found the tail of a vector the rotation must be the missing
1432 // front. If we found the head of a vector, it must be how much of the
1433 // head.
1434 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1435
1436 if (Rotation == 0)
1437 Rotation = CandidateRotation;
1438 else if (Rotation != CandidateRotation)
1439 return -1;
1440
1441 // Compute which value this mask is pointing at.
1442 SDValue MaskV = M < NumElts ? V1 : V2;
1443
1444 // Compute which of the two target values this index should be assigned
1445 // to. This reflects whether the high elements are remaining or the low
1446 // elements are remaining.
1447 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1448
1449 // Either set up this value if we've not encountered it before, or check
1450 // that it remains consistent.
1451 if (!TargetV)
1452 TargetV = MaskV;
1453 else if (TargetV != MaskV)
1454 return -1;
1455 }
1456
1457 // Check that we successfully analyzed the mask, and normalize the results.
1458 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1459 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1460 if (!Lo)
1461 Lo = Hi;
1462 else if (!Hi)
1463 Hi = Lo;
1464
1465 V1 = Lo;
1466 V2 = Hi;
1467
1468 return Rotation * Scale;
1469}
1470
1471/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1472///
1473/// For example:
1474/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1475/// <2 x i32> <i32 3, i32 0>
1476/// is lowered to:
1477/// (VBSRL_V $v1, $v1, 8)
1478/// (VBSLL_V $v0, $v0, 8)
1479/// (VOR_V $v0, $V0, $v1)
1480static SDValue
1482 SDValue V1, SDValue V2, SelectionDAG &DAG,
1483 const LoongArchSubtarget &Subtarget) {
1484
1485 SDValue Lo = V1, Hi = V2;
1486 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1487 if (ByteRotation <= 0)
1488 return SDValue();
1489
1490 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1491 Lo = DAG.getBitcast(ByteVT, Lo);
1492 Hi = DAG.getBitcast(ByteVT, Hi);
1493
1494 int LoByteShift = 16 - ByteRotation;
1495 int HiByteShift = ByteRotation;
1496 MVT GRLenVT = Subtarget.getGRLenVT();
1497
1498 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1499 DAG.getConstant(LoByteShift, DL, GRLenVT));
1500 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1501 DAG.getConstant(HiByteShift, DL, GRLenVT));
1502 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1503}
1504
1505/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1506///
1507/// For example:
1508/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1509/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1510/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1511/// is lowered to:
1512/// (VREPLI $v1, 0)
1513/// (VILVL $v0, $v1, $v0)
1515 ArrayRef<int> Mask, MVT VT,
1516 SDValue V1, SDValue V2,
1517 SelectionDAG &DAG,
1518 const APInt &Zeroable) {
1519 int Bits = VT.getSizeInBits();
1520 int EltBits = VT.getScalarSizeInBits();
1521 int NumElements = VT.getVectorNumElements();
1522
1523 if (Zeroable.isAllOnes())
1524 return DAG.getConstant(0, DL, VT);
1525
1526 // Define a helper function to check a particular ext-scale and lower to it if
1527 // valid.
1528 auto Lower = [&](int Scale) -> SDValue {
1529 SDValue InputV;
1530 bool AnyExt = true;
1531 int Offset = 0;
1532 for (int i = 0; i < NumElements; i++) {
1533 int M = Mask[i];
1534 if (M < 0)
1535 continue;
1536 if (i % Scale != 0) {
1537 // Each of the extended elements need to be zeroable.
1538 if (!Zeroable[i])
1539 return SDValue();
1540
1541 AnyExt = false;
1542 continue;
1543 }
1544
1545 // Each of the base elements needs to be consecutive indices into the
1546 // same input vector.
1547 SDValue V = M < NumElements ? V1 : V2;
1548 M = M % NumElements;
1549 if (!InputV) {
1550 InputV = V;
1551 Offset = M - (i / Scale);
1552
1553 // These offset can't be handled
1554 if (Offset % (NumElements / Scale))
1555 return SDValue();
1556 } else if (InputV != V)
1557 return SDValue();
1558
1559 if (M != (Offset + (i / Scale)))
1560 return SDValue(); // Non-consecutive strided elements.
1561 }
1562
1563 // If we fail to find an input, we have a zero-shuffle which should always
1564 // have already been handled.
1565 if (!InputV)
1566 return SDValue();
1567
1568 do {
1569 unsigned VilVLoHi = LoongArchISD::VILVL;
1570 if (Offset >= (NumElements / 2)) {
1571 VilVLoHi = LoongArchISD::VILVH;
1572 Offset -= (NumElements / 2);
1573 }
1574
1575 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1576 SDValue Ext =
1577 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1578 InputV = DAG.getBitcast(InputVT, InputV);
1579 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1580 Scale /= 2;
1581 EltBits *= 2;
1582 NumElements /= 2;
1583 } while (Scale > 1);
1584 return DAG.getBitcast(VT, InputV);
1585 };
1586
1587 // Each iteration, try extending the elements half as much, but into twice as
1588 // many elements.
1589 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1590 NumExtElements *= 2) {
1591 if (SDValue V = Lower(NumElements / NumExtElements))
1592 return V;
1593 }
1594 return SDValue();
1595}
1596
1597/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1598///
1599/// VREPLVEI performs vector broadcast based on an element specified by an
1600/// integer immediate, with its mask being similar to:
1601/// <x, x, x, ...>
1602/// where x is any valid index.
1603///
1604/// When undef's appear in the mask they are treated as if they were whatever
1605/// value is necessary in order to fit the above form.
1606static SDValue
1608 SDValue V1, SelectionDAG &DAG,
1609 const LoongArchSubtarget &Subtarget) {
1610 int SplatIndex = -1;
1611 for (const auto &M : Mask) {
1612 if (M != -1) {
1613 SplatIndex = M;
1614 break;
1615 }
1616 }
1617
1618 if (SplatIndex == -1)
1619 return DAG.getUNDEF(VT);
1620
1621 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1622 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1623 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1624 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1625 }
1626
1627 return SDValue();
1628}
1629
1630/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1631///
1632/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1633/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1634///
1635/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1636/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1637/// When undef's appear they are treated as if they were whatever value is
1638/// necessary in order to fit the above forms.
1639///
1640/// For example:
1641/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1642/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1643/// i32 7, i32 6, i32 5, i32 4>
1644/// is lowered to:
1645/// (VSHUF4I_H $v0, $v1, 27)
1646/// where the 27 comes from:
1647/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1648static SDValue
1650 SDValue V1, SDValue V2, SelectionDAG &DAG,
1651 const LoongArchSubtarget &Subtarget) {
1652
1653 unsigned SubVecSize = 4;
1654 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1655 SubVecSize = 2;
1656
1657 int SubMask[4] = {-1, -1, -1, -1};
1658 for (unsigned i = 0; i < SubVecSize; ++i) {
1659 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1660 int M = Mask[j];
1661
1662 // Convert from vector index to 4-element subvector index
1663 // If an index refers to an element outside of the subvector then give up
1664 if (M != -1) {
1665 M -= 4 * (j / SubVecSize);
1666 if (M < 0 || M >= 4)
1667 return SDValue();
1668 }
1669
1670 // If the mask has an undef, replace it with the current index.
1671 // Note that it might still be undef if the current index is also undef
1672 if (SubMask[i] == -1)
1673 SubMask[i] = M;
1674 // Check that non-undef values are the same as in the mask. If they
1675 // aren't then give up
1676 else if (M != -1 && M != SubMask[i])
1677 return SDValue();
1678 }
1679 }
1680
1681 // Calculate the immediate. Replace any remaining undefs with zero
1682 int Imm = 0;
1683 for (int i = SubVecSize - 1; i >= 0; --i) {
1684 int M = SubMask[i];
1685
1686 if (M == -1)
1687 M = 0;
1688
1689 Imm <<= 2;
1690 Imm |= M & 0x3;
1691 }
1692
1693 MVT GRLenVT = Subtarget.getGRLenVT();
1694
1695 // Return vshuf4i.d
1696 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1697 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1698 DAG.getConstant(Imm, DL, GRLenVT));
1699
1700 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1701 DAG.getConstant(Imm, DL, GRLenVT));
1702}
1703
1704/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1705///
1706/// VPACKEV interleaves the even elements from each vector.
1707///
1708/// It is possible to lower into VPACKEV when the mask consists of two of the
1709/// following forms interleaved:
1710/// <0, 2, 4, ...>
1711/// <n, n+2, n+4, ...>
1712/// where n is the number of elements in the vector.
1713/// For example:
1714/// <0, 0, 2, 2, 4, 4, ...>
1715/// <0, n, 2, n+2, 4, n+4, ...>
1716///
1717/// When undef's appear in the mask they are treated as if they were whatever
1718/// value is necessary in order to fit the above forms.
1720 MVT VT, SDValue V1, SDValue V2,
1721 SelectionDAG &DAG) {
1722
1723 const auto &Begin = Mask.begin();
1724 const auto &End = Mask.end();
1725 SDValue OriV1 = V1, OriV2 = V2;
1726
1727 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1728 V1 = OriV1;
1729 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1730 V1 = OriV2;
1731 else
1732 return SDValue();
1733
1734 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1735 V2 = OriV1;
1736 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1737 V2 = OriV2;
1738 else
1739 return SDValue();
1740
1741 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1742}
1743
1744/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1745///
1746/// VPACKOD interleaves the odd elements from each vector.
1747///
1748/// It is possible to lower into VPACKOD when the mask consists of two of the
1749/// following forms interleaved:
1750/// <1, 3, 5, ...>
1751/// <n+1, n+3, n+5, ...>
1752/// where n is the number of elements in the vector.
1753/// For example:
1754/// <1, 1, 3, 3, 5, 5, ...>
1755/// <1, n+1, 3, n+3, 5, n+5, ...>
1756///
1757/// When undef's appear in the mask they are treated as if they were whatever
1758/// value is necessary in order to fit the above forms.
1760 MVT VT, SDValue V1, SDValue V2,
1761 SelectionDAG &DAG) {
1762
1763 const auto &Begin = Mask.begin();
1764 const auto &End = Mask.end();
1765 SDValue OriV1 = V1, OriV2 = V2;
1766
1767 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1768 V1 = OriV1;
1769 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1770 V1 = OriV2;
1771 else
1772 return SDValue();
1773
1774 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1775 V2 = OriV1;
1776 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1777 V2 = OriV2;
1778 else
1779 return SDValue();
1780
1781 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1782}
1783
1784/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1785///
1786/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1787/// of each vector.
1788///
1789/// It is possible to lower into VILVH when the mask consists of two of the
1790/// following forms interleaved:
1791/// <x, x+1, x+2, ...>
1792/// <n+x, n+x+1, n+x+2, ...>
1793/// where n is the number of elements in the vector and x is half n.
1794/// For example:
1795/// <x, x, x+1, x+1, x+2, x+2, ...>
1796/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1797///
1798/// When undef's appear in the mask they are treated as if they were whatever
1799/// value is necessary in order to fit the above forms.
1801 MVT VT, SDValue V1, SDValue V2,
1802 SelectionDAG &DAG) {
1803
1804 const auto &Begin = Mask.begin();
1805 const auto &End = Mask.end();
1806 unsigned HalfSize = Mask.size() / 2;
1807 SDValue OriV1 = V1, OriV2 = V2;
1808
1809 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1810 V1 = OriV1;
1811 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1812 V1 = OriV2;
1813 else
1814 return SDValue();
1815
1816 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1817 V2 = OriV1;
1818 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1819 1))
1820 V2 = OriV2;
1821 else
1822 return SDValue();
1823
1824 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1825}
1826
1827/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1828///
1829/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1830/// of each vector.
1831///
1832/// It is possible to lower into VILVL when the mask consists of two of the
1833/// following forms interleaved:
1834/// <0, 1, 2, ...>
1835/// <n, n+1, n+2, ...>
1836/// where n is the number of elements in the vector.
1837/// For example:
1838/// <0, 0, 1, 1, 2, 2, ...>
1839/// <0, n, 1, n+1, 2, n+2, ...>
1840///
1841/// When undef's appear in the mask they are treated as if they were whatever
1842/// value is necessary in order to fit the above forms.
1844 MVT VT, SDValue V1, SDValue V2,
1845 SelectionDAG &DAG) {
1846
1847 const auto &Begin = Mask.begin();
1848 const auto &End = Mask.end();
1849 SDValue OriV1 = V1, OriV2 = V2;
1850
1851 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1852 V1 = OriV1;
1853 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1854 V1 = OriV2;
1855 else
1856 return SDValue();
1857
1858 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1859 V2 = OriV1;
1860 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1861 V2 = OriV2;
1862 else
1863 return SDValue();
1864
1865 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1866}
1867
1868/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1869///
1870/// VPICKEV copies the even elements of each vector into the result vector.
1871///
1872/// It is possible to lower into VPICKEV when the mask consists of two of the
1873/// following forms concatenated:
1874/// <0, 2, 4, ...>
1875/// <n, n+2, n+4, ...>
1876/// where n is the number of elements in the vector.
1877/// For example:
1878/// <0, 2, 4, ..., 0, 2, 4, ...>
1879/// <0, 2, 4, ..., n, n+2, n+4, ...>
1880///
1881/// When undef's appear in the mask they are treated as if they were whatever
1882/// value is necessary in order to fit the above forms.
1884 MVT VT, SDValue V1, SDValue V2,
1885 SelectionDAG &DAG) {
1886
1887 const auto &Begin = Mask.begin();
1888 const auto &Mid = Mask.begin() + Mask.size() / 2;
1889 const auto &End = Mask.end();
1890 SDValue OriV1 = V1, OriV2 = V2;
1891
1892 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1893 V1 = OriV1;
1894 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1895 V1 = OriV2;
1896 else
1897 return SDValue();
1898
1899 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1900 V2 = OriV1;
1901 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1902 V2 = OriV2;
1903
1904 else
1905 return SDValue();
1906
1907 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1908}
1909
1910/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1911///
1912/// VPICKOD copies the odd elements of each vector into the result vector.
1913///
1914/// It is possible to lower into VPICKOD when the mask consists of two of the
1915/// following forms concatenated:
1916/// <1, 3, 5, ...>
1917/// <n+1, n+3, n+5, ...>
1918/// where n is the number of elements in the vector.
1919/// For example:
1920/// <1, 3, 5, ..., 1, 3, 5, ...>
1921/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1922///
1923/// When undef's appear in the mask they are treated as if they were whatever
1924/// value is necessary in order to fit the above forms.
1926 MVT VT, SDValue V1, SDValue V2,
1927 SelectionDAG &DAG) {
1928
1929 const auto &Begin = Mask.begin();
1930 const auto &Mid = Mask.begin() + Mask.size() / 2;
1931 const auto &End = Mask.end();
1932 SDValue OriV1 = V1, OriV2 = V2;
1933
1934 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1935 V1 = OriV1;
1936 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1937 V1 = OriV2;
1938 else
1939 return SDValue();
1940
1941 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1942 V2 = OriV1;
1943 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1944 V2 = OriV2;
1945 else
1946 return SDValue();
1947
1948 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1949}
1950
1951/// Lower VECTOR_SHUFFLE into VSHUF.
1952///
1953/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1954/// adding it as an operand to the resulting VSHUF.
1956 MVT VT, SDValue V1, SDValue V2,
1957 SelectionDAG &DAG,
1958 const LoongArchSubtarget &Subtarget) {
1959
1961 for (auto M : Mask)
1962 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
1963
1964 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1965 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1966
1967 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1968 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1969 // VSHF concatenates the vectors in a bitwise fashion:
1970 // <0b00, 0b01> + <0b10, 0b11> ->
1971 // 0b0100 + 0b1110 -> 0b01001110
1972 // <0b10, 0b11, 0b00, 0b01>
1973 // We must therefore swap the operands to get the correct result.
1974 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1975}
1976
1977/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1978///
1979/// This routine breaks down the specific type of 128-bit shuffle and
1980/// dispatches to the lowering routines accordingly.
1982 SDValue V1, SDValue V2, SelectionDAG &DAG,
1983 const LoongArchSubtarget &Subtarget) {
1984 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1985 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1986 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1987 "Vector type is unsupported for lsx!");
1989 "Two operands have different types!");
1990 assert(VT.getVectorNumElements() == Mask.size() &&
1991 "Unexpected mask size for shuffle!");
1992 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1993
1994 APInt KnownUndef, KnownZero;
1995 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1996 APInt Zeroable = KnownUndef | KnownZero;
1997
1998 SDValue Result;
1999 // TODO: Add more comparison patterns.
2000 if (V2.isUndef()) {
2001 if ((Result =
2002 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2003 return Result;
2004 if ((Result =
2005 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2006 return Result;
2007
2008 // TODO: This comment may be enabled in the future to better match the
2009 // pattern for instruction selection.
2010 /* V2 = V1; */
2011 }
2012
2013 // It is recommended not to change the pattern comparison order for better
2014 // performance.
2015 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2016 return Result;
2017 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2018 return Result;
2019 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2020 return Result;
2021 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2022 return Result;
2023 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2024 return Result;
2025 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2026 return Result;
2027 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2028 (Result =
2029 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2030 return Result;
2031 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2032 Zeroable)))
2033 return Result;
2034 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2035 Zeroable)))
2036 return Result;
2037 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2038 Subtarget)))
2039 return Result;
2040 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2041 return NewShuffle;
2042 if ((Result =
2043 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2044 return Result;
2045 return SDValue();
2046}
2047
2048/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2049///
2050/// It is a XVREPLVEI when the mask is:
2051/// <x, x, x, ..., x+n, x+n, x+n, ...>
2052/// where the number of x is equal to n and n is half the length of vector.
2053///
2054/// When undef's appear in the mask they are treated as if they were whatever
2055/// value is necessary in order to fit the above form.
2056static SDValue
2058 SDValue V1, SelectionDAG &DAG,
2059 const LoongArchSubtarget &Subtarget) {
2060 int SplatIndex = -1;
2061 for (const auto &M : Mask) {
2062 if (M != -1) {
2063 SplatIndex = M;
2064 break;
2065 }
2066 }
2067
2068 if (SplatIndex == -1)
2069 return DAG.getUNDEF(VT);
2070
2071 const auto &Begin = Mask.begin();
2072 const auto &End = Mask.end();
2073 int HalfSize = Mask.size() / 2;
2074
2075 if (SplatIndex >= HalfSize)
2076 return SDValue();
2077
2078 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2079 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2080 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2081 0)) {
2082 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2083 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2084 }
2085
2086 return SDValue();
2087}
2088
2089/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2090static SDValue
2092 SDValue V1, SDValue V2, SelectionDAG &DAG,
2093 const LoongArchSubtarget &Subtarget) {
2094 // When the size is less than or equal to 4, lower cost instructions may be
2095 // used.
2096 if (Mask.size() <= 4)
2097 return SDValue();
2098 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2099}
2100
2101/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2102static SDValue
2104 SDValue V1, SelectionDAG &DAG,
2105 const LoongArchSubtarget &Subtarget) {
2106 // Only consider XVPERMI_D.
2107 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2108 return SDValue();
2109
2110 unsigned MaskImm = 0;
2111 for (unsigned i = 0; i < Mask.size(); ++i) {
2112 if (Mask[i] == -1)
2113 continue;
2114 MaskImm |= Mask[i] << (i * 2);
2115 }
2116
2117 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2118 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2119}
2120
2121/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2123 MVT VT, SDValue V1, SelectionDAG &DAG,
2124 const LoongArchSubtarget &Subtarget) {
2125 // LoongArch LASX only have XVPERM_W.
2126 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2127 return SDValue();
2128
2129 unsigned NumElts = VT.getVectorNumElements();
2130 unsigned HalfSize = NumElts / 2;
2131 bool FrontLo = true, FrontHi = true;
2132 bool BackLo = true, BackHi = true;
2133
2134 auto inRange = [](int val, int low, int high) {
2135 return (val == -1) || (val >= low && val < high);
2136 };
2137
2138 for (unsigned i = 0; i < HalfSize; ++i) {
2139 int Fronti = Mask[i];
2140 int Backi = Mask[i + HalfSize];
2141
2142 FrontLo &= inRange(Fronti, 0, HalfSize);
2143 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2144 BackLo &= inRange(Backi, 0, HalfSize);
2145 BackHi &= inRange(Backi, HalfSize, NumElts);
2146 }
2147
2148 // If both the lower and upper 128-bit parts access only one half of the
2149 // vector (either lower or upper), avoid using xvperm.w. The latency of
2150 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2151 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2152 return SDValue();
2153
2155 MVT GRLenVT = Subtarget.getGRLenVT();
2156 for (unsigned i = 0; i < NumElts; ++i)
2157 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2158 : DAG.getConstant(Mask[i], DL, GRLenVT));
2159 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2160
2161 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2162}
2163
2164/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2166 MVT VT, SDValue V1, SDValue V2,
2167 SelectionDAG &DAG) {
2168 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2169}
2170
2171/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2173 MVT VT, SDValue V1, SDValue V2,
2174 SelectionDAG &DAG) {
2175 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2176}
2177
2178/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2180 MVT VT, SDValue V1, SDValue V2,
2181 SelectionDAG &DAG) {
2182
2183 const auto &Begin = Mask.begin();
2184 const auto &End = Mask.end();
2185 unsigned HalfSize = Mask.size() / 2;
2186 unsigned LeftSize = HalfSize / 2;
2187 SDValue OriV1 = V1, OriV2 = V2;
2188
2189 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2190 1) &&
2191 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2192 V1 = OriV1;
2193 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2194 Mask.size() + HalfSize - LeftSize, 1) &&
2195 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2196 Mask.size() + HalfSize + LeftSize, 1))
2197 V1 = OriV2;
2198 else
2199 return SDValue();
2200
2201 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2202 1) &&
2203 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2204 1))
2205 V2 = OriV1;
2206 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2207 Mask.size() + HalfSize - LeftSize, 1) &&
2208 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2209 Mask.size() + HalfSize + LeftSize, 1))
2210 V2 = OriV2;
2211 else
2212 return SDValue();
2213
2214 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2215}
2216
2217/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2219 MVT VT, SDValue V1, SDValue V2,
2220 SelectionDAG &DAG) {
2221
2222 const auto &Begin = Mask.begin();
2223 const auto &End = Mask.end();
2224 unsigned HalfSize = Mask.size() / 2;
2225 SDValue OriV1 = V1, OriV2 = V2;
2226
2227 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2228 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2229 V1 = OriV1;
2230 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2231 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2232 Mask.size() + HalfSize, 1))
2233 V1 = OriV2;
2234 else
2235 return SDValue();
2236
2237 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2238 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2239 V2 = OriV1;
2240 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2241 1) &&
2242 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2243 Mask.size() + HalfSize, 1))
2244 V2 = OriV2;
2245 else
2246 return SDValue();
2247
2248 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2249}
2250
2251/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2253 MVT VT, SDValue V1, SDValue V2,
2254 SelectionDAG &DAG) {
2255
2256 const auto &Begin = Mask.begin();
2257 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2258 const auto &Mid = Mask.begin() + Mask.size() / 2;
2259 const auto &RightMid = Mask.end() - Mask.size() / 4;
2260 const auto &End = Mask.end();
2261 unsigned HalfSize = Mask.size() / 2;
2262 SDValue OriV1 = V1, OriV2 = V2;
2263
2264 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2265 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2266 V1 = OriV1;
2267 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2268 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2269 V1 = OriV2;
2270 else
2271 return SDValue();
2272
2273 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2274 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2275 V2 = OriV1;
2276 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2277 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2278 V2 = OriV2;
2279
2280 else
2281 return SDValue();
2282
2283 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2284}
2285
2286/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2288 MVT VT, SDValue V1, SDValue V2,
2289 SelectionDAG &DAG) {
2290
2291 const auto &Begin = Mask.begin();
2292 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2293 const auto &Mid = Mask.begin() + Mask.size() / 2;
2294 const auto &RightMid = Mask.end() - Mask.size() / 4;
2295 const auto &End = Mask.end();
2296 unsigned HalfSize = Mask.size() / 2;
2297 SDValue OriV1 = V1, OriV2 = V2;
2298
2299 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2300 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2301 V1 = OriV1;
2302 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2303 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2304 2))
2305 V1 = OriV2;
2306 else
2307 return SDValue();
2308
2309 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2310 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2311 V2 = OriV1;
2312 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2313 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2314 2))
2315 V2 = OriV2;
2316 else
2317 return SDValue();
2318
2319 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2320}
2321
2322/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2324 MVT VT, SDValue V1, SDValue V2,
2325 SelectionDAG &DAG) {
2326
2327 int MaskSize = Mask.size();
2328 int HalfSize = Mask.size() / 2;
2329 const auto &Begin = Mask.begin();
2330 const auto &Mid = Mask.begin() + HalfSize;
2331 const auto &End = Mask.end();
2332
2333 // VECTOR_SHUFFLE concatenates the vectors:
2334 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2335 // shuffling ->
2336 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2337 //
2338 // XVSHUF concatenates the vectors:
2339 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2340 // shuffling ->
2341 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2342 SmallVector<SDValue, 8> MaskAlloc;
2343 for (auto it = Begin; it < Mid; it++) {
2344 if (*it < 0) // UNDEF
2345 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2346 else if ((*it >= 0 && *it < HalfSize) ||
2347 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2348 int M = *it < HalfSize ? *it : *it - HalfSize;
2349 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2350 } else
2351 return SDValue();
2352 }
2353 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2354
2355 for (auto it = Mid; it < End; it++) {
2356 if (*it < 0) // UNDEF
2357 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2358 else if ((*it >= HalfSize && *it < MaskSize) ||
2359 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2360 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2361 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2362 } else
2363 return SDValue();
2364 }
2365 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2366
2367 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2368 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2369 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2370}
2371
2372/// Shuffle vectors by lane to generate more optimized instructions.
2373/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2374///
2375/// Therefore, except for the following four cases, other cases are regarded
2376/// as cross-lane shuffles, where optimization is relatively limited.
2377///
2378/// - Shuffle high, low lanes of two inputs vector
2379/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2380/// - Shuffle low, high lanes of two inputs vector
2381/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2382/// - Shuffle low, low lanes of two inputs vector
2383/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2384/// - Shuffle high, high lanes of two inputs vector
2385/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2386///
2387/// The first case is the closest to LoongArch instructions and the other
2388/// cases need to be converted to it for processing.
2389///
2390/// This function will return true for the last three cases above and will
2391/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2392/// cross-lane shuffle cases.
2394 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2395 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2396
2397 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2398
2399 int MaskSize = Mask.size();
2400 int HalfSize = Mask.size() / 2;
2401 MVT GRLenVT = Subtarget.getGRLenVT();
2402
2403 HalfMaskType preMask = None, postMask = None;
2404
2405 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2406 return M < 0 || (M >= 0 && M < HalfSize) ||
2407 (M >= MaskSize && M < MaskSize + HalfSize);
2408 }))
2409 preMask = HighLaneTy;
2410 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2411 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2412 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2413 }))
2414 preMask = LowLaneTy;
2415
2416 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2417 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2418 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2419 }))
2420 postMask = LowLaneTy;
2421 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2422 return M < 0 || (M >= 0 && M < HalfSize) ||
2423 (M >= MaskSize && M < MaskSize + HalfSize);
2424 }))
2425 postMask = HighLaneTy;
2426
2427 // The pre-half of mask is high lane type, and the post-half of mask
2428 // is low lane type, which is closest to the LoongArch instructions.
2429 //
2430 // Note: In the LoongArch architecture, the high lane of mask corresponds
2431 // to the lower 128-bit of vector register, and the low lane of mask
2432 // corresponds the higher 128-bit of vector register.
2433 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2434 return false;
2435 }
2436 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2437 V1 = DAG.getBitcast(MVT::v4i64, V1);
2438 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2439 DAG.getConstant(0b01001110, DL, GRLenVT));
2440 V1 = DAG.getBitcast(VT, V1);
2441
2442 if (!V2.isUndef()) {
2443 V2 = DAG.getBitcast(MVT::v4i64, V2);
2444 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2445 DAG.getConstant(0b01001110, DL, GRLenVT));
2446 V2 = DAG.getBitcast(VT, V2);
2447 }
2448
2449 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2450 *it = *it < 0 ? *it : *it - HalfSize;
2451 }
2452 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2453 *it = *it < 0 ? *it : *it + HalfSize;
2454 }
2455 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2456 V1 = DAG.getBitcast(MVT::v4i64, V1);
2457 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2458 DAG.getConstant(0b11101110, DL, GRLenVT));
2459 V1 = DAG.getBitcast(VT, V1);
2460
2461 if (!V2.isUndef()) {
2462 V2 = DAG.getBitcast(MVT::v4i64, V2);
2463 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2464 DAG.getConstant(0b11101110, DL, GRLenVT));
2465 V2 = DAG.getBitcast(VT, V2);
2466 }
2467
2468 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2469 *it = *it < 0 ? *it : *it - HalfSize;
2470 }
2471 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2472 V1 = DAG.getBitcast(MVT::v4i64, V1);
2473 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2474 DAG.getConstant(0b01000100, DL, GRLenVT));
2475 V1 = DAG.getBitcast(VT, V1);
2476
2477 if (!V2.isUndef()) {
2478 V2 = DAG.getBitcast(MVT::v4i64, V2);
2479 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2480 DAG.getConstant(0b01000100, DL, GRLenVT));
2481 V2 = DAG.getBitcast(VT, V2);
2482 }
2483
2484 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2485 *it = *it < 0 ? *it : *it + HalfSize;
2486 }
2487 } else { // cross-lane
2488 return false;
2489 }
2490
2491 return true;
2492}
2493
2494/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2495/// Only for 256-bit vector.
2496///
2497/// For example:
2498/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2499/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2500/// is lowerded to:
2501/// (XVPERMI $xr2, $xr0, 78)
2502/// (XVSHUF $xr1, $xr2, $xr0)
2503/// (XVORI $xr0, $xr1, 0)
2505 ArrayRef<int> Mask,
2506 MVT VT, SDValue V1,
2507 SDValue V2,
2508 SelectionDAG &DAG) {
2509 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2510 int Size = Mask.size();
2511 int LaneSize = Size / 2;
2512
2513 bool LaneCrossing[2] = {false, false};
2514 for (int i = 0; i < Size; ++i)
2515 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2516 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2517
2518 // Ensure that all lanes ared involved.
2519 if (!LaneCrossing[0] && !LaneCrossing[1])
2520 return SDValue();
2521
2522 SmallVector<int> InLaneMask;
2523 InLaneMask.assign(Mask.begin(), Mask.end());
2524 for (int i = 0; i < Size; ++i) {
2525 int &M = InLaneMask[i];
2526 if (M < 0)
2527 continue;
2528 if (((M % Size) / LaneSize) != (i / LaneSize))
2529 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2530 }
2531
2532 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2533 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2534 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2535 Flipped = DAG.getBitcast(VT, Flipped);
2536 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2537}
2538
2539/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2540///
2541/// This routine breaks down the specific type of 256-bit shuffle and
2542/// dispatches to the lowering routines accordingly.
2544 SDValue V1, SDValue V2, SelectionDAG &DAG,
2545 const LoongArchSubtarget &Subtarget) {
2546 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2547 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2548 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2549 "Vector type is unsupported for lasx!");
2551 "Two operands have different types!");
2552 assert(VT.getVectorNumElements() == Mask.size() &&
2553 "Unexpected mask size for shuffle!");
2554 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2555 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2556
2557 APInt KnownUndef, KnownZero;
2558 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2559 APInt Zeroable = KnownUndef | KnownZero;
2560
2561 SDValue Result;
2562 // TODO: Add more comparison patterns.
2563 if (V2.isUndef()) {
2564 if ((Result =
2565 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2566 return Result;
2567 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2568 Subtarget)))
2569 return Result;
2570 if ((Result =
2571 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2572 return Result;
2573 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2574 return Result;
2575
2576 // TODO: This comment may be enabled in the future to better match the
2577 // pattern for instruction selection.
2578 /* V2 = V1; */
2579 }
2580
2581 // It is recommended not to change the pattern comparison order for better
2582 // performance.
2583 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2584 return Result;
2585 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2586 return Result;
2587 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2588 return Result;
2589 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2590 return Result;
2591 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2592 return Result;
2593 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2594 return Result;
2595 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2596 Zeroable)))
2597 return Result;
2598 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2599 Subtarget)))
2600 return Result;
2601
2602 // canonicalize non cross-lane shuffle vector
2603 SmallVector<int> NewMask(Mask);
2604 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2605 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2606
2607 // FIXME: Handling the remaining cases earlier can degrade performance
2608 // in some situations. Further analysis is required to enable more
2609 // effective optimizations.
2610 if (V2.isUndef()) {
2611 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2612 V1, V2, DAG)))
2613 return Result;
2614 }
2615
2616 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2617 return NewShuffle;
2618 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2619 return Result;
2620
2621 return SDValue();
2622}
2623
2624SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2625 SelectionDAG &DAG) const {
2626 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2627 ArrayRef<int> OrigMask = SVOp->getMask();
2628 SDValue V1 = Op.getOperand(0);
2629 SDValue V2 = Op.getOperand(1);
2630 MVT VT = Op.getSimpleValueType();
2631 int NumElements = VT.getVectorNumElements();
2632 SDLoc DL(Op);
2633
2634 bool V1IsUndef = V1.isUndef();
2635 bool V2IsUndef = V2.isUndef();
2636 if (V1IsUndef && V2IsUndef)
2637 return DAG.getUNDEF(VT);
2638
2639 // When we create a shuffle node we put the UNDEF node to second operand,
2640 // but in some cases the first operand may be transformed to UNDEF.
2641 // In this case we should just commute the node.
2642 if (V1IsUndef)
2643 return DAG.getCommutedVectorShuffle(*SVOp);
2644
2645 // Check for non-undef masks pointing at an undef vector and make the masks
2646 // undef as well. This makes it easier to match the shuffle based solely on
2647 // the mask.
2648 if (V2IsUndef &&
2649 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2650 SmallVector<int, 8> NewMask(OrigMask);
2651 for (int &M : NewMask)
2652 if (M >= NumElements)
2653 M = -1;
2654 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2655 }
2656
2657 // Check for illegal shuffle mask element index values.
2658 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2659 (void)MaskUpperLimit;
2660 assert(llvm::all_of(OrigMask,
2661 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2662 "Out of bounds shuffle index");
2663
2664 // For each vector width, delegate to a specialized lowering routine.
2665 if (VT.is128BitVector())
2666 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2667
2668 if (VT.is256BitVector())
2669 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2670
2671 return SDValue();
2672}
2673
2674SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2675 SelectionDAG &DAG) const {
2676 // Custom lower to ensure the libcall return is passed in an FPR on hard
2677 // float ABIs.
2678 SDLoc DL(Op);
2679 MakeLibCallOptions CallOptions;
2680 SDValue Op0 = Op.getOperand(0);
2681 SDValue Chain = SDValue();
2682 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2683 SDValue Res;
2684 std::tie(Res, Chain) =
2685 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2686 if (Subtarget.is64Bit())
2687 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2688 return DAG.getBitcast(MVT::i32, Res);
2689}
2690
2691SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2692 SelectionDAG &DAG) const {
2693 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2694 // float ABIs.
2695 SDLoc DL(Op);
2696 MakeLibCallOptions CallOptions;
2697 SDValue Op0 = Op.getOperand(0);
2698 SDValue Chain = SDValue();
2699 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2700 DL, MVT::f32, Op0)
2701 : DAG.getBitcast(MVT::f32, Op0);
2702 SDValue Res;
2703 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2704 CallOptions, DL, Chain);
2705 return Res;
2706}
2707
2708SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2709 SelectionDAG &DAG) const {
2710 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2711 SDLoc DL(Op);
2712 MakeLibCallOptions CallOptions;
2713 RTLIB::Libcall LC =
2714 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2715 SDValue Res =
2716 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2717 if (Subtarget.is64Bit())
2718 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2719 return DAG.getBitcast(MVT::i32, Res);
2720}
2721
2722SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2723 SelectionDAG &DAG) const {
2724 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2725 MVT VT = Op.getSimpleValueType();
2726 SDLoc DL(Op);
2727 Op = DAG.getNode(
2728 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2729 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2730 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2731 DL, MVT::f32, Op)
2732 : DAG.getBitcast(MVT::f32, Op);
2733 if (VT != MVT::f32)
2734 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2735 return Res;
2736}
2737
2738// Lower BUILD_VECTOR as broadcast load (if possible).
2739// For example:
2740// %a = load i8, ptr %ptr
2741// %b = build_vector %a, %a, %a, %a
2742// is lowered to :
2743// (VLDREPL_B $a0, 0)
2745 const SDLoc &DL,
2746 SelectionDAG &DAG) {
2747 MVT VT = BVOp->getSimpleValueType(0);
2748 int NumOps = BVOp->getNumOperands();
2749
2750 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2751 "Unsupported vector type for broadcast.");
2752
2753 SDValue IdentitySrc;
2754 bool IsIdeneity = true;
2755
2756 for (int i = 0; i != NumOps; i++) {
2757 SDValue Op = BVOp->getOperand(i);
2758 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2759 IsIdeneity = false;
2760 break;
2761 }
2762 IdentitySrc = BVOp->getOperand(0);
2763 }
2764
2765 // make sure that this load is valid and only has one user.
2766 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2767 return SDValue();
2768
2769 auto *LN = cast<LoadSDNode>(IdentitySrc);
2770 auto ExtType = LN->getExtensionType();
2771
2772 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2773 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2774 SDVTList Tys =
2775 LN->isIndexed()
2776 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2777 : DAG.getVTList(VT, MVT::Other);
2778 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2779 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2780 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2781 return BCast;
2782 }
2783 return SDValue();
2784}
2785
2786// Sequentially insert elements from Ops into Vector, from low to high indices.
2787// Note: Ops can have fewer elements than Vector.
2789 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2790 EVT ResTy) {
2791 assert(Ops.size() <= ResTy.getVectorNumElements());
2792
2793 SDValue Op0 = Ops[0];
2794 if (!Op0.isUndef())
2795 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2796 for (unsigned i = 1; i < Ops.size(); ++i) {
2797 SDValue Opi = Ops[i];
2798 if (Opi.isUndef())
2799 continue;
2800 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2801 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2802 }
2803}
2804
2805// Build a ResTy subvector from Node, taking NumElts elements starting at index
2806// 'first'.
2808 SelectionDAG &DAG, SDLoc DL,
2809 const LoongArchSubtarget &Subtarget,
2810 EVT ResTy, unsigned first) {
2811 unsigned NumElts = ResTy.getVectorNumElements();
2812
2813 assert(first >= 0 &&
2814 first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2815
2816 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2817 Node->op_begin() + first + NumElts);
2818 SDValue Vector = DAG.getUNDEF(ResTy);
2819 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2820 return Vector;
2821}
2822
2823SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2824 SelectionDAG &DAG) const {
2825 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2826 MVT VT = Node->getSimpleValueType(0);
2827 EVT ResTy = Op->getValueType(0);
2828 unsigned NumElts = ResTy.getVectorNumElements();
2829 SDLoc DL(Op);
2830 APInt SplatValue, SplatUndef;
2831 unsigned SplatBitSize;
2832 bool HasAnyUndefs;
2833 bool IsConstant = false;
2834 bool UseSameConstant = true;
2835 SDValue ConstantValue;
2836 bool Is128Vec = ResTy.is128BitVector();
2837 bool Is256Vec = ResTy.is256BitVector();
2838
2839 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2840 (!Subtarget.hasExtLASX() || !Is256Vec))
2841 return SDValue();
2842
2843 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2844 return Result;
2845
2846 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2847 /*MinSplatBits=*/8) &&
2848 SplatBitSize <= 64) {
2849 // We can only cope with 8, 16, 32, or 64-bit elements.
2850 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2851 SplatBitSize != 64)
2852 return SDValue();
2853
2854 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2855 // We can only handle 64-bit elements that are within
2856 // the signed 10-bit range or match vldi patterns on 32-bit targets.
2857 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2858 if (!SplatValue.isSignedIntN(10) &&
2859 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
2860 return SDValue();
2861 if ((Is128Vec && ResTy == MVT::v4i32) ||
2862 (Is256Vec && ResTy == MVT::v8i32))
2863 return Op;
2864 }
2865
2866 EVT ViaVecTy;
2867
2868 switch (SplatBitSize) {
2869 default:
2870 return SDValue();
2871 case 8:
2872 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2873 break;
2874 case 16:
2875 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2876 break;
2877 case 32:
2878 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2879 break;
2880 case 64:
2881 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2882 break;
2883 }
2884
2885 // SelectionDAG::getConstant will promote SplatValue appropriately.
2886 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2887
2888 // Bitcast to the type we originally wanted.
2889 if (ViaVecTy != ResTy)
2890 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2891
2892 return Result;
2893 }
2894
2895 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2896 return Op;
2897
2898 for (unsigned i = 0; i < NumElts; ++i) {
2899 SDValue Opi = Node->getOperand(i);
2900 if (isIntOrFPConstant(Opi)) {
2901 IsConstant = true;
2902 if (!ConstantValue.getNode())
2903 ConstantValue = Opi;
2904 else if (ConstantValue != Opi)
2905 UseSameConstant = false;
2906 }
2907 }
2908
2909 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2910 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2911 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2912 for (unsigned i = 0; i < NumElts; ++i) {
2913 SDValue Opi = Node->getOperand(i);
2914 if (!isIntOrFPConstant(Opi))
2915 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2916 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2917 }
2918 return Result;
2919 }
2920
2921 if (!IsConstant) {
2922 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
2923 // the sub-sequence of the vector and then broadcast the sub-sequence.
2924 //
2925 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
2926 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
2927 // generates worse code in some cases. This could be further optimized
2928 // with more consideration.
2930 BitVector UndefElements;
2931 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
2932 UndefElements.count() == 0) {
2933 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
2934 // because the high part can be simply treated as undef.
2935 SDValue Vector = DAG.getUNDEF(ResTy);
2936 EVT FillTy = Is256Vec
2938 : ResTy;
2939 SDValue FillVec =
2940 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
2941
2942 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
2943
2944 unsigned SeqLen = Sequence.size();
2945 unsigned SplatLen = NumElts / SeqLen;
2946 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
2947 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
2948
2949 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
2950 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
2951 if (SplatEltTy == MVT::i128)
2952 SplatTy = MVT::v4i64;
2953
2954 SDValue SplatVec;
2955 SDValue SrcVec = DAG.getBitcast(
2956 SplatTy,
2957 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
2958 if (Is256Vec) {
2959 SplatVec =
2960 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
2962 DL, SplatTy, SrcVec);
2963 } else {
2964 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
2965 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
2966 }
2967
2968 return DAG.getBitcast(ResTy, SplatVec);
2969 }
2970
2971 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
2972 // using memory operations is much lower.
2973 //
2974 // For 256-bit vectors, normally split into two halves and concatenate.
2975 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
2976 // one non-undef element, skip spliting to avoid a worse result.
2977 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
2978 ResTy == MVT::v4f64) {
2979 unsigned NonUndefCount = 0;
2980 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
2981 if (!Node->getOperand(i).isUndef()) {
2982 ++NonUndefCount;
2983 if (NonUndefCount > 1)
2984 break;
2985 }
2986 }
2987 if (NonUndefCount == 1)
2988 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
2989 }
2990
2991 EVT VecTy =
2992 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
2993 SDValue Vector =
2994 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
2995
2996 if (Is128Vec)
2997 return Vector;
2998
2999 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3000 VecTy, NumElts / 2);
3001
3002 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3003 }
3004
3005 return SDValue();
3006}
3007
3008SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3009 SelectionDAG &DAG) const {
3010 SDLoc DL(Op);
3011 MVT ResVT = Op.getSimpleValueType();
3012 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3013
3014 unsigned NumOperands = Op.getNumOperands();
3015 unsigned NumFreezeUndef = 0;
3016 unsigned NumZero = 0;
3017 unsigned NumNonZero = 0;
3018 unsigned NonZeros = 0;
3019 SmallSet<SDValue, 4> Undefs;
3020 for (unsigned i = 0; i != NumOperands; ++i) {
3021 SDValue SubVec = Op.getOperand(i);
3022 if (SubVec.isUndef())
3023 continue;
3024 if (ISD::isFreezeUndef(SubVec.getNode())) {
3025 // If the freeze(undef) has multiple uses then we must fold to zero.
3026 if (SubVec.hasOneUse()) {
3027 ++NumFreezeUndef;
3028 } else {
3029 ++NumZero;
3030 Undefs.insert(SubVec);
3031 }
3032 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3033 ++NumZero;
3034 else {
3035 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3036 NonZeros |= 1 << i;
3037 ++NumNonZero;
3038 }
3039 }
3040
3041 // If we have more than 2 non-zeros, build each half separately.
3042 if (NumNonZero > 2) {
3043 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3044 ArrayRef<SDUse> Ops = Op->ops();
3045 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3046 Ops.slice(0, NumOperands / 2));
3047 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3048 Ops.slice(NumOperands / 2));
3049 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3050 }
3051
3052 // Otherwise, build it up through insert_subvectors.
3053 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3054 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3055 : DAG.getUNDEF(ResVT));
3056
3057 // Replace Undef operands with ZeroVector.
3058 for (SDValue U : Undefs)
3059 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3060
3061 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3062 unsigned NumSubElems = SubVT.getVectorNumElements();
3063 for (unsigned i = 0; i != NumOperands; ++i) {
3064 if ((NonZeros & (1 << i)) == 0)
3065 continue;
3066
3067 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3068 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3069 }
3070
3071 return Vec;
3072}
3073
3074SDValue
3075LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3076 SelectionDAG &DAG) const {
3077 MVT EltVT = Op.getSimpleValueType();
3078 SDValue Vec = Op->getOperand(0);
3079 EVT VecTy = Vec->getValueType(0);
3080 SDValue Idx = Op->getOperand(1);
3081 SDLoc DL(Op);
3082 MVT GRLenVT = Subtarget.getGRLenVT();
3083
3084 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3085
3086 if (isa<ConstantSDNode>(Idx))
3087 return Op;
3088
3089 switch (VecTy.getSimpleVT().SimpleTy) {
3090 default:
3091 llvm_unreachable("Unexpected type");
3092 case MVT::v32i8:
3093 case MVT::v16i16:
3094 case MVT::v4i64:
3095 case MVT::v4f64: {
3096 // Extract the high half subvector and place it to the low half of a new
3097 // vector. It doesn't matter what the high half of the new vector is.
3098 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3099 SDValue VecHi =
3100 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3101 SDValue TmpVec =
3102 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3103 VecHi, DAG.getConstant(0, DL, GRLenVT));
3104
3105 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3106 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3107 // desired element.
3108 SDValue IdxCp =
3109 Subtarget.is64Bit()
3110 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3111 : DAG.getBitcast(MVT::f32, Idx);
3112 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3113 SDValue MaskVec =
3114 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3115 SDValue ResVec =
3116 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3117
3118 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3119 DAG.getConstant(0, DL, GRLenVT));
3120 }
3121 case MVT::v8i32:
3122 case MVT::v8f32: {
3123 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3124 SDValue SplatValue =
3125 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3126
3127 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3128 DAG.getConstant(0, DL, GRLenVT));
3129 }
3130 }
3131}
3132
3133SDValue
3134LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3135 SelectionDAG &DAG) const {
3136 MVT VT = Op.getSimpleValueType();
3137 MVT EltVT = VT.getVectorElementType();
3138 unsigned NumElts = VT.getVectorNumElements();
3139 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3140 SDLoc DL(Op);
3141 SDValue Op0 = Op.getOperand(0);
3142 SDValue Op1 = Op.getOperand(1);
3143 SDValue Op2 = Op.getOperand(2);
3144
3145 if (isa<ConstantSDNode>(Op2))
3146 return Op;
3147
3148 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3149 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3150
3151 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3152 return SDValue();
3153
3154 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3155 SmallVector<SDValue, 32> RawIndices;
3156 SDValue SplatIdx;
3157 SDValue Indices;
3158
3159 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3160 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3161 for (unsigned i = 0; i < NumElts; ++i) {
3162 RawIndices.push_back(Op2);
3163 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3164 }
3165 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3166 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3167
3168 RawIndices.clear();
3169 for (unsigned i = 0; i < NumElts; ++i) {
3170 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3171 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3172 }
3173 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3174 Indices = DAG.getBitcast(IdxVTy, Indices);
3175 } else {
3176 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3177
3178 for (unsigned i = 0; i < NumElts; ++i)
3179 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3180 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3181 }
3182
3183 // insert vec, elt, idx
3184 // =>
3185 // select (splatidx == {0,1,2...}) ? splatelt : vec
3186 SDValue SelectCC =
3187 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3188 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3189}
3190
3191SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3192 SelectionDAG &DAG) const {
3193 SDLoc DL(Op);
3194 SyncScope::ID FenceSSID =
3195 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3196
3197 // singlethread fences only synchronize with signal handlers on the same
3198 // thread and thus only need to preserve instruction order, not actually
3199 // enforce memory ordering.
3200 if (FenceSSID == SyncScope::SingleThread)
3201 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3202 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3203
3204 return Op;
3205}
3206
3207SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3208 SelectionDAG &DAG) const {
3209
3210 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3211 DAG.getContext()->emitError(
3212 "On LA64, only 64-bit registers can be written.");
3213 return Op.getOperand(0);
3214 }
3215
3216 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3217 DAG.getContext()->emitError(
3218 "On LA32, only 32-bit registers can be written.");
3219 return Op.getOperand(0);
3220 }
3221
3222 return Op;
3223}
3224
3225SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3226 SelectionDAG &DAG) const {
3227 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3228 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3229 "be a constant integer");
3230 return SDValue();
3231 }
3232
3233 MachineFunction &MF = DAG.getMachineFunction();
3235 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3236 EVT VT = Op.getValueType();
3237 SDLoc DL(Op);
3238 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3239 unsigned Depth = Op.getConstantOperandVal(0);
3240 int GRLenInBytes = Subtarget.getGRLen() / 8;
3241
3242 while (Depth--) {
3243 int Offset = -(GRLenInBytes * 2);
3244 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3245 DAG.getSignedConstant(Offset, DL, VT));
3246 FrameAddr =
3247 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3248 }
3249 return FrameAddr;
3250}
3251
3252SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3253 SelectionDAG &DAG) const {
3254 // Currently only support lowering return address for current frame.
3255 if (Op.getConstantOperandVal(0) != 0) {
3256 DAG.getContext()->emitError(
3257 "return address can only be determined for the current frame");
3258 return SDValue();
3259 }
3260
3261 MachineFunction &MF = DAG.getMachineFunction();
3263 MVT GRLenVT = Subtarget.getGRLenVT();
3264
3265 // Return the value of the return address register, marking it an implicit
3266 // live-in.
3267 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3268 getRegClassFor(GRLenVT));
3269 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3270}
3271
3272SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3273 SelectionDAG &DAG) const {
3274 MachineFunction &MF = DAG.getMachineFunction();
3275 auto Size = Subtarget.getGRLen() / 8;
3276 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3277 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3278}
3279
3280SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3281 SelectionDAG &DAG) const {
3282 MachineFunction &MF = DAG.getMachineFunction();
3283 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3284
3285 SDLoc DL(Op);
3286 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3288
3289 // vastart just stores the address of the VarArgsFrameIndex slot into the
3290 // memory location argument.
3291 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3292 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3293 MachinePointerInfo(SV));
3294}
3295
3296SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3297 SelectionDAG &DAG) const {
3298 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3299 !Subtarget.hasBasicD() && "unexpected target features");
3300
3301 SDLoc DL(Op);
3302 SDValue Op0 = Op.getOperand(0);
3303 if (Op0->getOpcode() == ISD::AND) {
3304 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3305 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3306 return Op;
3307 }
3308
3309 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3310 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3311 Op0.getConstantOperandVal(2) == UINT64_C(0))
3312 return Op;
3313
3314 if (Op0.getOpcode() == ISD::AssertZext &&
3315 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3316 return Op;
3317
3318 EVT OpVT = Op0.getValueType();
3319 EVT RetVT = Op.getValueType();
3320 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3321 MakeLibCallOptions CallOptions;
3322 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3323 SDValue Chain = SDValue();
3325 std::tie(Result, Chain) =
3326 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3327 return Result;
3328}
3329
3330SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3331 SelectionDAG &DAG) const {
3332 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3333 !Subtarget.hasBasicD() && "unexpected target features");
3334
3335 SDLoc DL(Op);
3336 SDValue Op0 = Op.getOperand(0);
3337
3338 if ((Op0.getOpcode() == ISD::AssertSext ||
3340 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3341 return Op;
3342
3343 EVT OpVT = Op0.getValueType();
3344 EVT RetVT = Op.getValueType();
3345 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3346 MakeLibCallOptions CallOptions;
3347 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3348 SDValue Chain = SDValue();
3350 std::tie(Result, Chain) =
3351 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3352 return Result;
3353}
3354
3355SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3356 SelectionDAG &DAG) const {
3357
3358 SDLoc DL(Op);
3359 EVT VT = Op.getValueType();
3360 SDValue Op0 = Op.getOperand(0);
3361 EVT Op0VT = Op0.getValueType();
3362
3363 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3364 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3365 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3366 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3367 }
3368 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3369 SDValue Lo, Hi;
3370 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3371 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3372 }
3373 return Op;
3374}
3375
3376SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3377 SelectionDAG &DAG) const {
3378
3379 SDLoc DL(Op);
3380 SDValue Op0 = Op.getOperand(0);
3381
3382 if (Op0.getValueType() == MVT::f16)
3383 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3384
3385 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3386 !Subtarget.hasBasicD()) {
3387 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3388 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3389 }
3390
3391 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3392 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3393 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3394}
3395
3397 SelectionDAG &DAG, unsigned Flags) {
3398 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3399}
3400
3402 SelectionDAG &DAG, unsigned Flags) {
3403 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3404 Flags);
3405}
3406
3408 SelectionDAG &DAG, unsigned Flags) {
3409 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3410 N->getOffset(), Flags);
3411}
3412
3414 SelectionDAG &DAG, unsigned Flags) {
3415 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3416}
3417
3418template <class NodeTy>
3419SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3421 bool IsLocal) const {
3422 SDLoc DL(N);
3423 EVT Ty = getPointerTy(DAG.getDataLayout());
3424 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3425 SDValue Load;
3426
3427 switch (M) {
3428 default:
3429 report_fatal_error("Unsupported code model");
3430
3431 case CodeModel::Large: {
3432 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3433
3434 // This is not actually used, but is necessary for successfully matching
3435 // the PseudoLA_*_LARGE nodes.
3436 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3437 if (IsLocal) {
3438 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3439 // eventually becomes the desired 5-insn code sequence.
3440 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3441 Tmp, Addr),
3442 0);
3443 } else {
3444 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3445 // eventually becomes the desired 5-insn code sequence.
3446 Load = SDValue(
3447 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3448 0);
3449 }
3450 break;
3451 }
3452
3453 case CodeModel::Small:
3454 case CodeModel::Medium:
3455 if (IsLocal) {
3456 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3457 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3458 Load = SDValue(
3459 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3460 } else {
3461 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3462 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3463 Load =
3464 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3465 }
3466 }
3467
3468 if (!IsLocal) {
3469 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3470 MachineFunction &MF = DAG.getMachineFunction();
3471 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3475 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3476 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3477 }
3478
3479 return Load;
3480}
3481
3482SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3483 SelectionDAG &DAG) const {
3484 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3485 DAG.getTarget().getCodeModel());
3486}
3487
3488SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3489 SelectionDAG &DAG) const {
3490 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3491 DAG.getTarget().getCodeModel());
3492}
3493
3494SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3495 SelectionDAG &DAG) const {
3496 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3497 DAG.getTarget().getCodeModel());
3498}
3499
3500SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3501 SelectionDAG &DAG) const {
3502 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3503 assert(N->getOffset() == 0 && "unexpected offset in global node");
3504 auto CM = DAG.getTarget().getCodeModel();
3505 const GlobalValue *GV = N->getGlobal();
3506
3507 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3508 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3509 CM = *GCM;
3510 }
3511
3512 return getAddr(N, DAG, CM, GV->isDSOLocal());
3513}
3514
3515SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3516 SelectionDAG &DAG,
3517 unsigned Opc, bool UseGOT,
3518 bool Large) const {
3519 SDLoc DL(N);
3520 EVT Ty = getPointerTy(DAG.getDataLayout());
3521 MVT GRLenVT = Subtarget.getGRLenVT();
3522
3523 // This is not actually used, but is necessary for successfully matching the
3524 // PseudoLA_*_LARGE nodes.
3525 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3526 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3527
3528 // Only IE needs an extra argument for large code model.
3529 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3530 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3531 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3532
3533 // If it is LE for normal/medium code model, the add tp operation will occur
3534 // during the pseudo-instruction expansion.
3535 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3536 return Offset;
3537
3538 if (UseGOT) {
3539 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3540 MachineFunction &MF = DAG.getMachineFunction();
3541 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3545 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3546 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3547 }
3548
3549 // Add the thread pointer.
3550 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3551 DAG.getRegister(LoongArch::R2, GRLenVT));
3552}
3553
3554SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3555 SelectionDAG &DAG,
3556 unsigned Opc,
3557 bool Large) const {
3558 SDLoc DL(N);
3559 EVT Ty = getPointerTy(DAG.getDataLayout());
3560 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3561
3562 // This is not actually used, but is necessary for successfully matching the
3563 // PseudoLA_*_LARGE nodes.
3564 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3565
3566 // Use a PC-relative addressing mode to access the dynamic GOT address.
3567 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3568 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3569 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3570
3571 // Prepare argument list to generate call.
3573 Args.emplace_back(Load, CallTy);
3574
3575 // Setup call to __tls_get_addr.
3576 TargetLowering::CallLoweringInfo CLI(DAG);
3577 CLI.setDebugLoc(DL)
3578 .setChain(DAG.getEntryNode())
3579 .setLibCallee(CallingConv::C, CallTy,
3580 DAG.getExternalSymbol("__tls_get_addr", Ty),
3581 std::move(Args));
3582
3583 return LowerCallTo(CLI).first;
3584}
3585
3586SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3587 SelectionDAG &DAG, unsigned Opc,
3588 bool Large) const {
3589 SDLoc DL(N);
3590 EVT Ty = getPointerTy(DAG.getDataLayout());
3591 const GlobalValue *GV = N->getGlobal();
3592
3593 // This is not actually used, but is necessary for successfully matching the
3594 // PseudoLA_*_LARGE nodes.
3595 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3596
3597 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3598 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3599 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3600 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3601 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3602}
3603
3604SDValue
3605LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3606 SelectionDAG &DAG) const {
3609 report_fatal_error("In GHC calling convention TLS is not supported");
3610
3611 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3612 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3613
3614 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3615 assert(N->getOffset() == 0 && "unexpected offset in global node");
3616
3617 if (DAG.getTarget().useEmulatedTLS())
3618 reportFatalUsageError("the emulated TLS is prohibited");
3619
3620 bool IsDesc = DAG.getTarget().useTLSDESC();
3621
3622 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3624 // In this model, application code calls the dynamic linker function
3625 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3626 // runtime.
3627 if (!IsDesc)
3628 return getDynamicTLSAddr(N, DAG,
3629 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3630 : LoongArch::PseudoLA_TLS_GD,
3631 Large);
3632 break;
3634 // Same as GeneralDynamic, except for assembly modifiers and relocation
3635 // records.
3636 if (!IsDesc)
3637 return getDynamicTLSAddr(N, DAG,
3638 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3639 : LoongArch::PseudoLA_TLS_LD,
3640 Large);
3641 break;
3643 // This model uses the GOT to resolve TLS offsets.
3644 return getStaticTLSAddr(N, DAG,
3645 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3646 : LoongArch::PseudoLA_TLS_IE,
3647 /*UseGOT=*/true, Large);
3649 // This model is used when static linking as the TLS offsets are resolved
3650 // during program linking.
3651 //
3652 // This node doesn't need an extra argument for the large code model.
3653 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3654 /*UseGOT=*/false, Large);
3655 }
3656
3657 return getTLSDescAddr(N, DAG,
3658 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3659 : LoongArch::PseudoLA_TLS_DESC,
3660 Large);
3661}
3662
3663template <unsigned N>
3665 SelectionDAG &DAG, bool IsSigned = false) {
3666 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3667 // Check the ImmArg.
3668 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3669 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3670 DAG.getContext()->emitError(Op->getOperationName(0) +
3671 ": argument out of range.");
3672 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3673 }
3674 return SDValue();
3675}
3676
3677SDValue
3678LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3679 SelectionDAG &DAG) const {
3680 switch (Op.getConstantOperandVal(0)) {
3681 default:
3682 return SDValue(); // Don't custom lower most intrinsics.
3683 case Intrinsic::thread_pointer: {
3684 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3685 return DAG.getRegister(LoongArch::R2, PtrVT);
3686 }
3687 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3688 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3689 case Intrinsic::loongarch_lsx_vreplvei_d:
3690 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3691 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3692 case Intrinsic::loongarch_lsx_vreplvei_w:
3693 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3694 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3695 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3696 case Intrinsic::loongarch_lasx_xvpickve_d:
3697 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3698 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3699 case Intrinsic::loongarch_lasx_xvinsve0_d:
3700 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3701 case Intrinsic::loongarch_lsx_vsat_b:
3702 case Intrinsic::loongarch_lsx_vsat_bu:
3703 case Intrinsic::loongarch_lsx_vrotri_b:
3704 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3705 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3706 case Intrinsic::loongarch_lsx_vsrlri_b:
3707 case Intrinsic::loongarch_lsx_vsrari_b:
3708 case Intrinsic::loongarch_lsx_vreplvei_h:
3709 case Intrinsic::loongarch_lasx_xvsat_b:
3710 case Intrinsic::loongarch_lasx_xvsat_bu:
3711 case Intrinsic::loongarch_lasx_xvrotri_b:
3712 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3713 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3714 case Intrinsic::loongarch_lasx_xvsrlri_b:
3715 case Intrinsic::loongarch_lasx_xvsrari_b:
3716 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3717 case Intrinsic::loongarch_lasx_xvpickve_w:
3718 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3719 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3720 case Intrinsic::loongarch_lasx_xvinsve0_w:
3721 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3722 case Intrinsic::loongarch_lsx_vsat_h:
3723 case Intrinsic::loongarch_lsx_vsat_hu:
3724 case Intrinsic::loongarch_lsx_vrotri_h:
3725 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3726 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3727 case Intrinsic::loongarch_lsx_vsrlri_h:
3728 case Intrinsic::loongarch_lsx_vsrari_h:
3729 case Intrinsic::loongarch_lsx_vreplvei_b:
3730 case Intrinsic::loongarch_lasx_xvsat_h:
3731 case Intrinsic::loongarch_lasx_xvsat_hu:
3732 case Intrinsic::loongarch_lasx_xvrotri_h:
3733 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3734 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3735 case Intrinsic::loongarch_lasx_xvsrlri_h:
3736 case Intrinsic::loongarch_lasx_xvsrari_h:
3737 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3738 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3739 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3740 case Intrinsic::loongarch_lsx_vsrani_b_h:
3741 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3742 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3743 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3744 case Intrinsic::loongarch_lsx_vssrani_b_h:
3745 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3746 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3747 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3748 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3749 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3750 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3751 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3752 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3753 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3754 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3755 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3756 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3757 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3758 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3759 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3760 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3761 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3762 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3763 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3764 case Intrinsic::loongarch_lsx_vsat_w:
3765 case Intrinsic::loongarch_lsx_vsat_wu:
3766 case Intrinsic::loongarch_lsx_vrotri_w:
3767 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3768 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3769 case Intrinsic::loongarch_lsx_vsrlri_w:
3770 case Intrinsic::loongarch_lsx_vsrari_w:
3771 case Intrinsic::loongarch_lsx_vslei_bu:
3772 case Intrinsic::loongarch_lsx_vslei_hu:
3773 case Intrinsic::loongarch_lsx_vslei_wu:
3774 case Intrinsic::loongarch_lsx_vslei_du:
3775 case Intrinsic::loongarch_lsx_vslti_bu:
3776 case Intrinsic::loongarch_lsx_vslti_hu:
3777 case Intrinsic::loongarch_lsx_vslti_wu:
3778 case Intrinsic::loongarch_lsx_vslti_du:
3779 case Intrinsic::loongarch_lsx_vbsll_v:
3780 case Intrinsic::loongarch_lsx_vbsrl_v:
3781 case Intrinsic::loongarch_lasx_xvsat_w:
3782 case Intrinsic::loongarch_lasx_xvsat_wu:
3783 case Intrinsic::loongarch_lasx_xvrotri_w:
3784 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3785 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3786 case Intrinsic::loongarch_lasx_xvsrlri_w:
3787 case Intrinsic::loongarch_lasx_xvsrari_w:
3788 case Intrinsic::loongarch_lasx_xvslei_bu:
3789 case Intrinsic::loongarch_lasx_xvslei_hu:
3790 case Intrinsic::loongarch_lasx_xvslei_wu:
3791 case Intrinsic::loongarch_lasx_xvslei_du:
3792 case Intrinsic::loongarch_lasx_xvslti_bu:
3793 case Intrinsic::loongarch_lasx_xvslti_hu:
3794 case Intrinsic::loongarch_lasx_xvslti_wu:
3795 case Intrinsic::loongarch_lasx_xvslti_du:
3796 case Intrinsic::loongarch_lasx_xvbsll_v:
3797 case Intrinsic::loongarch_lasx_xvbsrl_v:
3798 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3799 case Intrinsic::loongarch_lsx_vseqi_b:
3800 case Intrinsic::loongarch_lsx_vseqi_h:
3801 case Intrinsic::loongarch_lsx_vseqi_w:
3802 case Intrinsic::loongarch_lsx_vseqi_d:
3803 case Intrinsic::loongarch_lsx_vslei_b:
3804 case Intrinsic::loongarch_lsx_vslei_h:
3805 case Intrinsic::loongarch_lsx_vslei_w:
3806 case Intrinsic::loongarch_lsx_vslei_d:
3807 case Intrinsic::loongarch_lsx_vslti_b:
3808 case Intrinsic::loongarch_lsx_vslti_h:
3809 case Intrinsic::loongarch_lsx_vslti_w:
3810 case Intrinsic::loongarch_lsx_vslti_d:
3811 case Intrinsic::loongarch_lasx_xvseqi_b:
3812 case Intrinsic::loongarch_lasx_xvseqi_h:
3813 case Intrinsic::loongarch_lasx_xvseqi_w:
3814 case Intrinsic::loongarch_lasx_xvseqi_d:
3815 case Intrinsic::loongarch_lasx_xvslei_b:
3816 case Intrinsic::loongarch_lasx_xvslei_h:
3817 case Intrinsic::loongarch_lasx_xvslei_w:
3818 case Intrinsic::loongarch_lasx_xvslei_d:
3819 case Intrinsic::loongarch_lasx_xvslti_b:
3820 case Intrinsic::loongarch_lasx_xvslti_h:
3821 case Intrinsic::loongarch_lasx_xvslti_w:
3822 case Intrinsic::loongarch_lasx_xvslti_d:
3823 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3824 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3825 case Intrinsic::loongarch_lsx_vsrani_h_w:
3826 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3827 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3828 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3829 case Intrinsic::loongarch_lsx_vssrani_h_w:
3830 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3831 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3832 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3833 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3834 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3835 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3836 case Intrinsic::loongarch_lsx_vfrstpi_b:
3837 case Intrinsic::loongarch_lsx_vfrstpi_h:
3838 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3839 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3840 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3841 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3842 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3843 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3844 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3845 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3846 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3847 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3848 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3849 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3850 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3851 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3852 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3853 case Intrinsic::loongarch_lsx_vsat_d:
3854 case Intrinsic::loongarch_lsx_vsat_du:
3855 case Intrinsic::loongarch_lsx_vrotri_d:
3856 case Intrinsic::loongarch_lsx_vsrlri_d:
3857 case Intrinsic::loongarch_lsx_vsrari_d:
3858 case Intrinsic::loongarch_lasx_xvsat_d:
3859 case Intrinsic::loongarch_lasx_xvsat_du:
3860 case Intrinsic::loongarch_lasx_xvrotri_d:
3861 case Intrinsic::loongarch_lasx_xvsrlri_d:
3862 case Intrinsic::loongarch_lasx_xvsrari_d:
3863 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3864 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3865 case Intrinsic::loongarch_lsx_vsrani_w_d:
3866 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3867 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3868 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3869 case Intrinsic::loongarch_lsx_vssrani_w_d:
3870 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3871 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3872 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3873 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3874 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3875 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3876 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3877 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3878 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3879 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3880 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3881 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3882 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3883 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3884 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3885 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3886 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3887 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3888 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3889 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3890 case Intrinsic::loongarch_lsx_vsrani_d_q:
3891 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3892 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3893 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3894 case Intrinsic::loongarch_lsx_vssrani_d_q:
3895 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3896 case Intrinsic::loongarch_lsx_vssrani_du_q:
3897 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3898 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3899 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3900 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3901 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3902 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3903 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3904 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3905 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3906 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3907 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3908 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3909 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3910 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3911 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3912 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3913 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3914 case Intrinsic::loongarch_lsx_vnori_b:
3915 case Intrinsic::loongarch_lsx_vshuf4i_b:
3916 case Intrinsic::loongarch_lsx_vshuf4i_h:
3917 case Intrinsic::loongarch_lsx_vshuf4i_w:
3918 case Intrinsic::loongarch_lasx_xvnori_b:
3919 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3920 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3921 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3922 case Intrinsic::loongarch_lasx_xvpermi_d:
3923 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3924 case Intrinsic::loongarch_lsx_vshuf4i_d:
3925 case Intrinsic::loongarch_lsx_vpermi_w:
3926 case Intrinsic::loongarch_lsx_vbitseli_b:
3927 case Intrinsic::loongarch_lsx_vextrins_b:
3928 case Intrinsic::loongarch_lsx_vextrins_h:
3929 case Intrinsic::loongarch_lsx_vextrins_w:
3930 case Intrinsic::loongarch_lsx_vextrins_d:
3931 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3932 case Intrinsic::loongarch_lasx_xvpermi_w:
3933 case Intrinsic::loongarch_lasx_xvpermi_q:
3934 case Intrinsic::loongarch_lasx_xvbitseli_b:
3935 case Intrinsic::loongarch_lasx_xvextrins_b:
3936 case Intrinsic::loongarch_lasx_xvextrins_h:
3937 case Intrinsic::loongarch_lasx_xvextrins_w:
3938 case Intrinsic::loongarch_lasx_xvextrins_d:
3939 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3940 case Intrinsic::loongarch_lsx_vrepli_b:
3941 case Intrinsic::loongarch_lsx_vrepli_h:
3942 case Intrinsic::loongarch_lsx_vrepli_w:
3943 case Intrinsic::loongarch_lsx_vrepli_d:
3944 case Intrinsic::loongarch_lasx_xvrepli_b:
3945 case Intrinsic::loongarch_lasx_xvrepli_h:
3946 case Intrinsic::loongarch_lasx_xvrepli_w:
3947 case Intrinsic::loongarch_lasx_xvrepli_d:
3948 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3949 case Intrinsic::loongarch_lsx_vldi:
3950 case Intrinsic::loongarch_lasx_xvldi:
3951 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3952 }
3953}
3954
3955// Helper function that emits error message for intrinsics with chain and return
3956// merge values of a UNDEF and the chain.
3958 StringRef ErrorMsg,
3959 SelectionDAG &DAG) {
3960 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3961 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3962 SDLoc(Op));
3963}
3964
3965SDValue
3966LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3967 SelectionDAG &DAG) const {
3968 SDLoc DL(Op);
3969 MVT GRLenVT = Subtarget.getGRLenVT();
3970 EVT VT = Op.getValueType();
3971 SDValue Chain = Op.getOperand(0);
3972 const StringRef ErrorMsgOOR = "argument out of range";
3973 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3974 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3975
3976 switch (Op.getConstantOperandVal(1)) {
3977 default:
3978 return Op;
3979 case Intrinsic::loongarch_crc_w_b_w:
3980 case Intrinsic::loongarch_crc_w_h_w:
3981 case Intrinsic::loongarch_crc_w_w_w:
3982 case Intrinsic::loongarch_crc_w_d_w:
3983 case Intrinsic::loongarch_crcc_w_b_w:
3984 case Intrinsic::loongarch_crcc_w_h_w:
3985 case Intrinsic::loongarch_crcc_w_w_w:
3986 case Intrinsic::loongarch_crcc_w_d_w:
3987 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3988 case Intrinsic::loongarch_csrrd_w:
3989 case Intrinsic::loongarch_csrrd_d: {
3990 unsigned Imm = Op.getConstantOperandVal(2);
3991 return !isUInt<14>(Imm)
3992 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3993 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3994 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3995 }
3996 case Intrinsic::loongarch_csrwr_w:
3997 case Intrinsic::loongarch_csrwr_d: {
3998 unsigned Imm = Op.getConstantOperandVal(3);
3999 return !isUInt<14>(Imm)
4000 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4001 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4002 {Chain, Op.getOperand(2),
4003 DAG.getConstant(Imm, DL, GRLenVT)});
4004 }
4005 case Intrinsic::loongarch_csrxchg_w:
4006 case Intrinsic::loongarch_csrxchg_d: {
4007 unsigned Imm = Op.getConstantOperandVal(4);
4008 return !isUInt<14>(Imm)
4009 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4010 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4011 {Chain, Op.getOperand(2), Op.getOperand(3),
4012 DAG.getConstant(Imm, DL, GRLenVT)});
4013 }
4014 case Intrinsic::loongarch_iocsrrd_d: {
4015 return DAG.getNode(
4016 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4017 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4018 }
4019#define IOCSRRD_CASE(NAME, NODE) \
4020 case Intrinsic::loongarch_##NAME: { \
4021 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4022 {Chain, Op.getOperand(2)}); \
4023 }
4024 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4025 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4026 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4027#undef IOCSRRD_CASE
4028 case Intrinsic::loongarch_cpucfg: {
4029 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4030 {Chain, Op.getOperand(2)});
4031 }
4032 case Intrinsic::loongarch_lddir_d: {
4033 unsigned Imm = Op.getConstantOperandVal(3);
4034 return !isUInt<8>(Imm)
4035 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4036 : Op;
4037 }
4038 case Intrinsic::loongarch_movfcsr2gr: {
4039 if (!Subtarget.hasBasicF())
4040 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4041 unsigned Imm = Op.getConstantOperandVal(2);
4042 return !isUInt<2>(Imm)
4043 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4044 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4045 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4046 }
4047 case Intrinsic::loongarch_lsx_vld:
4048 case Intrinsic::loongarch_lsx_vldrepl_b:
4049 case Intrinsic::loongarch_lasx_xvld:
4050 case Intrinsic::loongarch_lasx_xvldrepl_b:
4051 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4052 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4053 : SDValue();
4054 case Intrinsic::loongarch_lsx_vldrepl_h:
4055 case Intrinsic::loongarch_lasx_xvldrepl_h:
4056 return !isShiftedInt<11, 1>(
4057 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4059 Op, "argument out of range or not a multiple of 2", DAG)
4060 : SDValue();
4061 case Intrinsic::loongarch_lsx_vldrepl_w:
4062 case Intrinsic::loongarch_lasx_xvldrepl_w:
4063 return !isShiftedInt<10, 2>(
4064 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4066 Op, "argument out of range or not a multiple of 4", DAG)
4067 : SDValue();
4068 case Intrinsic::loongarch_lsx_vldrepl_d:
4069 case Intrinsic::loongarch_lasx_xvldrepl_d:
4070 return !isShiftedInt<9, 3>(
4071 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4073 Op, "argument out of range or not a multiple of 8", DAG)
4074 : SDValue();
4075 }
4076}
4077
4078// Helper function that emits error message for intrinsics with void return
4079// value and return the chain.
4081 SelectionDAG &DAG) {
4082
4083 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4084 return Op.getOperand(0);
4085}
4086
4087SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4088 SelectionDAG &DAG) const {
4089 SDLoc DL(Op);
4090 MVT GRLenVT = Subtarget.getGRLenVT();
4091 SDValue Chain = Op.getOperand(0);
4092 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4093 SDValue Op2 = Op.getOperand(2);
4094 const StringRef ErrorMsgOOR = "argument out of range";
4095 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4096 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4097 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4098
4099 switch (IntrinsicEnum) {
4100 default:
4101 // TODO: Add more Intrinsics.
4102 return SDValue();
4103 case Intrinsic::loongarch_cacop_d:
4104 case Intrinsic::loongarch_cacop_w: {
4105 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4106 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4107 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4108 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4109 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4110 unsigned Imm1 = Op2->getAsZExtVal();
4111 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4112 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4113 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4114 return Op;
4115 }
4116 case Intrinsic::loongarch_dbar: {
4117 unsigned Imm = Op2->getAsZExtVal();
4118 return !isUInt<15>(Imm)
4119 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4120 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4121 DAG.getConstant(Imm, DL, GRLenVT));
4122 }
4123 case Intrinsic::loongarch_ibar: {
4124 unsigned Imm = Op2->getAsZExtVal();
4125 return !isUInt<15>(Imm)
4126 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4127 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4128 DAG.getConstant(Imm, DL, GRLenVT));
4129 }
4130 case Intrinsic::loongarch_break: {
4131 unsigned Imm = Op2->getAsZExtVal();
4132 return !isUInt<15>(Imm)
4133 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4134 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4135 DAG.getConstant(Imm, DL, GRLenVT));
4136 }
4137 case Intrinsic::loongarch_movgr2fcsr: {
4138 if (!Subtarget.hasBasicF())
4139 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4140 unsigned Imm = Op2->getAsZExtVal();
4141 return !isUInt<2>(Imm)
4142 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4143 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4144 DAG.getConstant(Imm, DL, GRLenVT),
4145 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4146 Op.getOperand(3)));
4147 }
4148 case Intrinsic::loongarch_syscall: {
4149 unsigned Imm = Op2->getAsZExtVal();
4150 return !isUInt<15>(Imm)
4151 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4152 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4153 DAG.getConstant(Imm, DL, GRLenVT));
4154 }
4155#define IOCSRWR_CASE(NAME, NODE) \
4156 case Intrinsic::loongarch_##NAME: { \
4157 SDValue Op3 = Op.getOperand(3); \
4158 return Subtarget.is64Bit() \
4159 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4160 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4161 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4162 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4163 Op3); \
4164 }
4165 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4166 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4167 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4168#undef IOCSRWR_CASE
4169 case Intrinsic::loongarch_iocsrwr_d: {
4170 return !Subtarget.is64Bit()
4171 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4172 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4173 Op2,
4174 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4175 Op.getOperand(3)));
4176 }
4177#define ASRT_LE_GT_CASE(NAME) \
4178 case Intrinsic::loongarch_##NAME: { \
4179 return !Subtarget.is64Bit() \
4180 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4181 : Op; \
4182 }
4183 ASRT_LE_GT_CASE(asrtle_d)
4184 ASRT_LE_GT_CASE(asrtgt_d)
4185#undef ASRT_LE_GT_CASE
4186 case Intrinsic::loongarch_ldpte_d: {
4187 unsigned Imm = Op.getConstantOperandVal(3);
4188 return !Subtarget.is64Bit()
4189 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4190 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4191 : Op;
4192 }
4193 case Intrinsic::loongarch_lsx_vst:
4194 case Intrinsic::loongarch_lasx_xvst:
4195 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4196 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4197 : SDValue();
4198 case Intrinsic::loongarch_lasx_xvstelm_b:
4199 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4200 !isUInt<5>(Op.getConstantOperandVal(5)))
4201 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4202 : SDValue();
4203 case Intrinsic::loongarch_lsx_vstelm_b:
4204 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4205 !isUInt<4>(Op.getConstantOperandVal(5)))
4206 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4207 : SDValue();
4208 case Intrinsic::loongarch_lasx_xvstelm_h:
4209 return (!isShiftedInt<8, 1>(
4210 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4211 !isUInt<4>(Op.getConstantOperandVal(5)))
4213 Op, "argument out of range or not a multiple of 2", DAG)
4214 : SDValue();
4215 case Intrinsic::loongarch_lsx_vstelm_h:
4216 return (!isShiftedInt<8, 1>(
4217 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4218 !isUInt<3>(Op.getConstantOperandVal(5)))
4220 Op, "argument out of range or not a multiple of 2", DAG)
4221 : SDValue();
4222 case Intrinsic::loongarch_lasx_xvstelm_w:
4223 return (!isShiftedInt<8, 2>(
4224 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4225 !isUInt<3>(Op.getConstantOperandVal(5)))
4227 Op, "argument out of range or not a multiple of 4", DAG)
4228 : SDValue();
4229 case Intrinsic::loongarch_lsx_vstelm_w:
4230 return (!isShiftedInt<8, 2>(
4231 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4232 !isUInt<2>(Op.getConstantOperandVal(5)))
4234 Op, "argument out of range or not a multiple of 4", DAG)
4235 : SDValue();
4236 case Intrinsic::loongarch_lasx_xvstelm_d:
4237 return (!isShiftedInt<8, 3>(
4238 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4239 !isUInt<2>(Op.getConstantOperandVal(5)))
4241 Op, "argument out of range or not a multiple of 8", DAG)
4242 : SDValue();
4243 case Intrinsic::loongarch_lsx_vstelm_d:
4244 return (!isShiftedInt<8, 3>(
4245 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4246 !isUInt<1>(Op.getConstantOperandVal(5)))
4248 Op, "argument out of range or not a multiple of 8", DAG)
4249 : SDValue();
4250 }
4251}
4252
4253SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4254 SelectionDAG &DAG) const {
4255 SDLoc DL(Op);
4256 SDValue Lo = Op.getOperand(0);
4257 SDValue Hi = Op.getOperand(1);
4258 SDValue Shamt = Op.getOperand(2);
4259 EVT VT = Lo.getValueType();
4260
4261 // if Shamt-GRLen < 0: // Shamt < GRLen
4262 // Lo = Lo << Shamt
4263 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4264 // else:
4265 // Lo = 0
4266 // Hi = Lo << (Shamt-GRLen)
4267
4268 SDValue Zero = DAG.getConstant(0, DL, VT);
4269 SDValue One = DAG.getConstant(1, DL, VT);
4270 SDValue MinusGRLen =
4271 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4272 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4273 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4274 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4275
4276 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4277 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4278 SDValue ShiftRightLo =
4279 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4280 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4281 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4282 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4283
4284 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4285
4286 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4287 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4288
4289 SDValue Parts[2] = {Lo, Hi};
4290 return DAG.getMergeValues(Parts, DL);
4291}
4292
4293SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4294 SelectionDAG &DAG,
4295 bool IsSRA) const {
4296 SDLoc DL(Op);
4297 SDValue Lo = Op.getOperand(0);
4298 SDValue Hi = Op.getOperand(1);
4299 SDValue Shamt = Op.getOperand(2);
4300 EVT VT = Lo.getValueType();
4301
4302 // SRA expansion:
4303 // if Shamt-GRLen < 0: // Shamt < GRLen
4304 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4305 // Hi = Hi >>s Shamt
4306 // else:
4307 // Lo = Hi >>s (Shamt-GRLen);
4308 // Hi = Hi >>s (GRLen-1)
4309 //
4310 // SRL expansion:
4311 // if Shamt-GRLen < 0: // Shamt < GRLen
4312 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4313 // Hi = Hi >>u Shamt
4314 // else:
4315 // Lo = Hi >>u (Shamt-GRLen);
4316 // Hi = 0;
4317
4318 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4319
4320 SDValue Zero = DAG.getConstant(0, DL, VT);
4321 SDValue One = DAG.getConstant(1, DL, VT);
4322 SDValue MinusGRLen =
4323 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4324 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4325 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4326 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4327
4328 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4329 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4330 SDValue ShiftLeftHi =
4331 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4332 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4333 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4334 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4335 SDValue HiFalse =
4336 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4337
4338 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4339
4340 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4341 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4342
4343 SDValue Parts[2] = {Lo, Hi};
4344 return DAG.getMergeValues(Parts, DL);
4345}
4346
4347// Returns the opcode of the target-specific SDNode that implements the 32-bit
4348// form of the given Opcode.
4350 switch (Opcode) {
4351 default:
4352 llvm_unreachable("Unexpected opcode");
4353 case ISD::SDIV:
4354 return LoongArchISD::DIV_W;
4355 case ISD::UDIV:
4356 return LoongArchISD::DIV_WU;
4357 case ISD::SREM:
4358 return LoongArchISD::MOD_W;
4359 case ISD::UREM:
4360 return LoongArchISD::MOD_WU;
4361 case ISD::SHL:
4362 return LoongArchISD::SLL_W;
4363 case ISD::SRA:
4364 return LoongArchISD::SRA_W;
4365 case ISD::SRL:
4366 return LoongArchISD::SRL_W;
4367 case ISD::ROTL:
4368 case ISD::ROTR:
4369 return LoongArchISD::ROTR_W;
4370 case ISD::CTTZ:
4371 return LoongArchISD::CTZ_W;
4372 case ISD::CTLZ:
4373 return LoongArchISD::CLZ_W;
4374 }
4375}
4376
4377// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4378// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4379// otherwise be promoted to i64, making it difficult to select the
4380// SLL_W/.../*W later one because the fact the operation was originally of
4381// type i8/i16/i32 is lost.
4383 unsigned ExtOpc = ISD::ANY_EXTEND) {
4384 SDLoc DL(N);
4385 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4386 SDValue NewOp0, NewRes;
4387
4388 switch (NumOp) {
4389 default:
4390 llvm_unreachable("Unexpected NumOp");
4391 case 1: {
4392 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4393 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4394 break;
4395 }
4396 case 2: {
4397 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4398 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4399 if (N->getOpcode() == ISD::ROTL) {
4400 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4401 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4402 }
4403 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4404 break;
4405 }
4406 // TODO:Handle more NumOp.
4407 }
4408
4409 // ReplaceNodeResults requires we maintain the same type for the return
4410 // value.
4411 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4412}
4413
4414// Converts the given 32-bit operation to a i64 operation with signed extension
4415// semantic to reduce the signed extension instructions.
4417 SDLoc DL(N);
4418 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4419 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4420 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4421 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4422 DAG.getValueType(MVT::i32));
4423 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4424}
4425
4426// Helper function that emits error message for intrinsics with/without chain
4427// and return a UNDEF or and the chain as the results.
4430 StringRef ErrorMsg, bool WithChain = true) {
4431 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4432 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4433 if (!WithChain)
4434 return;
4435 Results.push_back(N->getOperand(0));
4436}
4437
4438template <unsigned N>
4439static void
4441 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4442 unsigned ResOp) {
4443 const StringRef ErrorMsgOOR = "argument out of range";
4444 unsigned Imm = Node->getConstantOperandVal(2);
4445 if (!isUInt<N>(Imm)) {
4447 /*WithChain=*/false);
4448 return;
4449 }
4450 SDLoc DL(Node);
4451 SDValue Vec = Node->getOperand(1);
4452
4453 SDValue PickElt =
4454 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4455 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4457 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4458 PickElt.getValue(0)));
4459}
4460
4463 SelectionDAG &DAG,
4464 const LoongArchSubtarget &Subtarget,
4465 unsigned ResOp) {
4466 SDLoc DL(N);
4467 SDValue Vec = N->getOperand(1);
4468
4469 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4470 Results.push_back(
4471 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4472}
4473
4474static void
4476 SelectionDAG &DAG,
4477 const LoongArchSubtarget &Subtarget) {
4478 switch (N->getConstantOperandVal(0)) {
4479 default:
4480 llvm_unreachable("Unexpected Intrinsic.");
4481 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4482 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4484 break;
4485 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4486 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4487 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4489 break;
4490 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4491 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4493 break;
4494 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4495 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4497 break;
4498 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4499 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4500 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4502 break;
4503 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4504 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4506 break;
4507 case Intrinsic::loongarch_lsx_bz_b:
4508 case Intrinsic::loongarch_lsx_bz_h:
4509 case Intrinsic::loongarch_lsx_bz_w:
4510 case Intrinsic::loongarch_lsx_bz_d:
4511 case Intrinsic::loongarch_lasx_xbz_b:
4512 case Intrinsic::loongarch_lasx_xbz_h:
4513 case Intrinsic::loongarch_lasx_xbz_w:
4514 case Intrinsic::loongarch_lasx_xbz_d:
4515 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4517 break;
4518 case Intrinsic::loongarch_lsx_bz_v:
4519 case Intrinsic::loongarch_lasx_xbz_v:
4520 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4522 break;
4523 case Intrinsic::loongarch_lsx_bnz_b:
4524 case Intrinsic::loongarch_lsx_bnz_h:
4525 case Intrinsic::loongarch_lsx_bnz_w:
4526 case Intrinsic::loongarch_lsx_bnz_d:
4527 case Intrinsic::loongarch_lasx_xbnz_b:
4528 case Intrinsic::loongarch_lasx_xbnz_h:
4529 case Intrinsic::loongarch_lasx_xbnz_w:
4530 case Intrinsic::loongarch_lasx_xbnz_d:
4531 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4533 break;
4534 case Intrinsic::loongarch_lsx_bnz_v:
4535 case Intrinsic::loongarch_lasx_xbnz_v:
4536 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4538 break;
4539 }
4540}
4541
4544 SelectionDAG &DAG) {
4545 assert(N->getValueType(0) == MVT::i128 &&
4546 "AtomicCmpSwap on types less than 128 should be legal");
4547 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4548
4549 unsigned Opcode;
4550 switch (MemOp->getMergedOrdering()) {
4554 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4555 break;
4558 Opcode = LoongArch::PseudoCmpXchg128;
4559 break;
4560 default:
4561 llvm_unreachable("Unexpected ordering!");
4562 }
4563
4564 SDLoc DL(N);
4565 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4566 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4567 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4568 NewVal.first, NewVal.second, N->getOperand(0)};
4569
4570 SDNode *CmpSwap = DAG.getMachineNode(
4571 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4572 Ops);
4573 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4574 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4575 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4576 Results.push_back(SDValue(CmpSwap, 3));
4577}
4578
4581 SDLoc DL(N);
4582 EVT VT = N->getValueType(0);
4583 switch (N->getOpcode()) {
4584 default:
4585 llvm_unreachable("Don't know how to legalize this operation");
4586 case ISD::ADD:
4587 case ISD::SUB:
4588 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4589 "Unexpected custom legalisation");
4590 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4591 break;
4592 case ISD::SDIV:
4593 case ISD::UDIV:
4594 case ISD::SREM:
4595 case ISD::UREM:
4596 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4597 "Unexpected custom legalisation");
4598 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4599 Subtarget.hasDiv32() && VT == MVT::i32
4601 : ISD::SIGN_EXTEND));
4602 break;
4603 case ISD::SHL:
4604 case ISD::SRA:
4605 case ISD::SRL:
4606 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4607 "Unexpected custom legalisation");
4608 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4609 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4610 break;
4611 }
4612 break;
4613 case ISD::ROTL:
4614 case ISD::ROTR:
4615 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4616 "Unexpected custom legalisation");
4617 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4618 break;
4619 case ISD::FP_TO_SINT: {
4620 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4621 "Unexpected custom legalisation");
4622 SDValue Src = N->getOperand(0);
4623 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4624 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4626 if (!isTypeLegal(Src.getValueType()))
4627 return;
4628 if (Src.getValueType() == MVT::f16)
4629 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4630 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4631 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4632 return;
4633 }
4634 // If the FP type needs to be softened, emit a library call using the 'si'
4635 // version. If we left it to default legalization we'd end up with 'di'.
4636 RTLIB::Libcall LC;
4637 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4638 MakeLibCallOptions CallOptions;
4639 EVT OpVT = Src.getValueType();
4640 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4641 SDValue Chain = SDValue();
4642 SDValue Result;
4643 std::tie(Result, Chain) =
4644 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4645 Results.push_back(Result);
4646 break;
4647 }
4648 case ISD::BITCAST: {
4649 SDValue Src = N->getOperand(0);
4650 EVT SrcVT = Src.getValueType();
4651 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4652 Subtarget.hasBasicF()) {
4653 SDValue Dst =
4654 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4655 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4656 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4658 DAG.getVTList(MVT::i32, MVT::i32), Src);
4659 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4660 NewReg.getValue(0), NewReg.getValue(1));
4661 Results.push_back(RetReg);
4662 }
4663 break;
4664 }
4665 case ISD::FP_TO_UINT: {
4666 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4667 "Unexpected custom legalisation");
4668 auto &TLI = DAG.getTargetLoweringInfo();
4669 SDValue Tmp1, Tmp2;
4670 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4671 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4672 break;
4673 }
4674 case ISD::BSWAP: {
4675 SDValue Src = N->getOperand(0);
4676 assert((VT == MVT::i16 || VT == MVT::i32) &&
4677 "Unexpected custom legalization");
4678 MVT GRLenVT = Subtarget.getGRLenVT();
4679 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4680 SDValue Tmp;
4681 switch (VT.getSizeInBits()) {
4682 default:
4683 llvm_unreachable("Unexpected operand width");
4684 case 16:
4685 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4686 break;
4687 case 32:
4688 // Only LA64 will get to here due to the size mismatch between VT and
4689 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4690 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4691 break;
4692 }
4693 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4694 break;
4695 }
4696 case ISD::BITREVERSE: {
4697 SDValue Src = N->getOperand(0);
4698 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4699 "Unexpected custom legalization");
4700 MVT GRLenVT = Subtarget.getGRLenVT();
4701 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4702 SDValue Tmp;
4703 switch (VT.getSizeInBits()) {
4704 default:
4705 llvm_unreachable("Unexpected operand width");
4706 case 8:
4707 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4708 break;
4709 case 32:
4710 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4711 break;
4712 }
4713 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4714 break;
4715 }
4716 case ISD::CTLZ:
4717 case ISD::CTTZ: {
4718 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4719 "Unexpected custom legalisation");
4720 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4721 break;
4722 }
4724 SDValue Chain = N->getOperand(0);
4725 SDValue Op2 = N->getOperand(2);
4726 MVT GRLenVT = Subtarget.getGRLenVT();
4727 const StringRef ErrorMsgOOR = "argument out of range";
4728 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4729 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4730
4731 switch (N->getConstantOperandVal(1)) {
4732 default:
4733 llvm_unreachable("Unexpected Intrinsic.");
4734 case Intrinsic::loongarch_movfcsr2gr: {
4735 if (!Subtarget.hasBasicF()) {
4736 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4737 return;
4738 }
4739 unsigned Imm = Op2->getAsZExtVal();
4740 if (!isUInt<2>(Imm)) {
4741 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4742 return;
4743 }
4744 SDValue MOVFCSR2GRResults = DAG.getNode(
4745 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4746 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4747 Results.push_back(
4748 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4749 Results.push_back(MOVFCSR2GRResults.getValue(1));
4750 break;
4751 }
4752#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4753 case Intrinsic::loongarch_##NAME: { \
4754 SDValue NODE = DAG.getNode( \
4755 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4756 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4757 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4758 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4759 Results.push_back(NODE.getValue(1)); \
4760 break; \
4761 }
4762 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4763 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4764 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4765 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4766 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4767 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4768#undef CRC_CASE_EXT_BINARYOP
4769
4770#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4771 case Intrinsic::loongarch_##NAME: { \
4772 SDValue NODE = DAG.getNode( \
4773 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4774 {Chain, Op2, \
4775 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4776 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4777 Results.push_back(NODE.getValue(1)); \
4778 break; \
4779 }
4780 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4781 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4782#undef CRC_CASE_EXT_UNARYOP
4783#define CSR_CASE(ID) \
4784 case Intrinsic::loongarch_##ID: { \
4785 if (!Subtarget.is64Bit()) \
4786 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4787 break; \
4788 }
4789 CSR_CASE(csrrd_d);
4790 CSR_CASE(csrwr_d);
4791 CSR_CASE(csrxchg_d);
4792 CSR_CASE(iocsrrd_d);
4793#undef CSR_CASE
4794 case Intrinsic::loongarch_csrrd_w: {
4795 unsigned Imm = Op2->getAsZExtVal();
4796 if (!isUInt<14>(Imm)) {
4797 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4798 return;
4799 }
4800 SDValue CSRRDResults =
4801 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4802 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4803 Results.push_back(
4804 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4805 Results.push_back(CSRRDResults.getValue(1));
4806 break;
4807 }
4808 case Intrinsic::loongarch_csrwr_w: {
4809 unsigned Imm = N->getConstantOperandVal(3);
4810 if (!isUInt<14>(Imm)) {
4811 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4812 return;
4813 }
4814 SDValue CSRWRResults =
4815 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4816 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4817 DAG.getConstant(Imm, DL, GRLenVT)});
4818 Results.push_back(
4819 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4820 Results.push_back(CSRWRResults.getValue(1));
4821 break;
4822 }
4823 case Intrinsic::loongarch_csrxchg_w: {
4824 unsigned Imm = N->getConstantOperandVal(4);
4825 if (!isUInt<14>(Imm)) {
4826 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4827 return;
4828 }
4829 SDValue CSRXCHGResults = DAG.getNode(
4830 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4831 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4832 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4833 DAG.getConstant(Imm, DL, GRLenVT)});
4834 Results.push_back(
4835 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4836 Results.push_back(CSRXCHGResults.getValue(1));
4837 break;
4838 }
4839#define IOCSRRD_CASE(NAME, NODE) \
4840 case Intrinsic::loongarch_##NAME: { \
4841 SDValue IOCSRRDResults = \
4842 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4843 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4844 Results.push_back( \
4845 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4846 Results.push_back(IOCSRRDResults.getValue(1)); \
4847 break; \
4848 }
4849 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4850 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4851 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4852#undef IOCSRRD_CASE
4853 case Intrinsic::loongarch_cpucfg: {
4854 SDValue CPUCFGResults =
4855 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4856 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4857 Results.push_back(
4858 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4859 Results.push_back(CPUCFGResults.getValue(1));
4860 break;
4861 }
4862 case Intrinsic::loongarch_lddir_d: {
4863 if (!Subtarget.is64Bit()) {
4864 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4865 return;
4866 }
4867 break;
4868 }
4869 }
4870 break;
4871 }
4872 case ISD::READ_REGISTER: {
4873 if (Subtarget.is64Bit())
4874 DAG.getContext()->emitError(
4875 "On LA64, only 64-bit registers can be read.");
4876 else
4877 DAG.getContext()->emitError(
4878 "On LA32, only 32-bit registers can be read.");
4879 Results.push_back(DAG.getUNDEF(VT));
4880 Results.push_back(N->getOperand(0));
4881 break;
4882 }
4884 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4885 break;
4886 }
4887 case ISD::LROUND: {
4888 SDValue Op0 = N->getOperand(0);
4889 EVT OpVT = Op0.getValueType();
4890 RTLIB::Libcall LC =
4891 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4892 MakeLibCallOptions CallOptions;
4893 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4894 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4895 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4896 Results.push_back(Result);
4897 break;
4898 }
4899 case ISD::ATOMIC_CMP_SWAP: {
4901 break;
4902 }
4903 case ISD::TRUNCATE: {
4904 MVT VT = N->getSimpleValueType(0);
4905 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4906 return;
4907
4908 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4909 SDValue In = N->getOperand(0);
4910 EVT InVT = In.getValueType();
4911 EVT InEltVT = InVT.getVectorElementType();
4912 EVT EltVT = VT.getVectorElementType();
4913 unsigned MinElts = VT.getVectorNumElements();
4914 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4915 unsigned InBits = InVT.getSizeInBits();
4916
4917 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4918 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4919 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4920 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4921 for (unsigned I = 0; I < MinElts; ++I)
4922 TruncMask[I] = Scale * I;
4923
4924 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4925 MVT SVT = In.getSimpleValueType().getScalarType();
4926 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4927 SDValue WidenIn =
4928 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4929 DAG.getVectorIdxConstant(0, DL));
4930 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4931 "Illegal vector type in truncation");
4932 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4933 Results.push_back(
4934 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4935 return;
4936 }
4937 }
4938
4939 break;
4940 }
4941 }
4942}
4943
4946 const LoongArchSubtarget &Subtarget) {
4947 if (DCI.isBeforeLegalizeOps())
4948 return SDValue();
4949
4950 SDValue FirstOperand = N->getOperand(0);
4951 SDValue SecondOperand = N->getOperand(1);
4952 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4953 EVT ValTy = N->getValueType(0);
4954 SDLoc DL(N);
4955 uint64_t lsb, msb;
4956 unsigned SMIdx, SMLen;
4957 ConstantSDNode *CN;
4958 SDValue NewOperand;
4959 MVT GRLenVT = Subtarget.getGRLenVT();
4960
4961 // BSTRPICK requires the 32S feature.
4962 if (!Subtarget.has32S())
4963 return SDValue();
4964
4965 // Op's second operand must be a shifted mask.
4966 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4967 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4968 return SDValue();
4969
4970 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4971 // Pattern match BSTRPICK.
4972 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4973 // => BSTRPICK $dst, $src, msb, lsb
4974 // where msb = lsb + len - 1
4975
4976 // The second operand of the shift must be an immediate.
4977 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4978 return SDValue();
4979
4980 lsb = CN->getZExtValue();
4981
4982 // Return if the shifted mask does not start at bit 0 or the sum of its
4983 // length and lsb exceeds the word's size.
4984 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4985 return SDValue();
4986
4987 NewOperand = FirstOperand.getOperand(0);
4988 } else {
4989 // Pattern match BSTRPICK.
4990 // $dst = and $src, (2**len- 1) , if len > 12
4991 // => BSTRPICK $dst, $src, msb, lsb
4992 // where lsb = 0 and msb = len - 1
4993
4994 // If the mask is <= 0xfff, andi can be used instead.
4995 if (CN->getZExtValue() <= 0xfff)
4996 return SDValue();
4997
4998 // Return if the MSB exceeds.
4999 if (SMIdx + SMLen > ValTy.getSizeInBits())
5000 return SDValue();
5001
5002 if (SMIdx > 0) {
5003 // Omit if the constant has more than 2 uses. This a conservative
5004 // decision. Whether it is a win depends on the HW microarchitecture.
5005 // However it should always be better for 1 and 2 uses.
5006 if (CN->use_size() > 2)
5007 return SDValue();
5008 // Return if the constant can be composed by a single LU12I.W.
5009 if ((CN->getZExtValue() & 0xfff) == 0)
5010 return SDValue();
5011 // Return if the constand can be composed by a single ADDI with
5012 // the zero register.
5013 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5014 return SDValue();
5015 }
5016
5017 lsb = SMIdx;
5018 NewOperand = FirstOperand;
5019 }
5020
5021 msb = lsb + SMLen - 1;
5022 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5023 DAG.getConstant(msb, DL, GRLenVT),
5024 DAG.getConstant(lsb, DL, GRLenVT));
5025 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5026 return NR0;
5027 // Try to optimize to
5028 // bstrpick $Rd, $Rs, msb, lsb
5029 // slli $Rd, $Rd, lsb
5030 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5031 DAG.getConstant(lsb, DL, GRLenVT));
5032}
5033
5036 const LoongArchSubtarget &Subtarget) {
5037 // BSTRPICK requires the 32S feature.
5038 if (!Subtarget.has32S())
5039 return SDValue();
5040
5041 if (DCI.isBeforeLegalizeOps())
5042 return SDValue();
5043
5044 // $dst = srl (and $src, Mask), Shamt
5045 // =>
5046 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5047 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5048 //
5049
5050 SDValue FirstOperand = N->getOperand(0);
5051 ConstantSDNode *CN;
5052 EVT ValTy = N->getValueType(0);
5053 SDLoc DL(N);
5054 MVT GRLenVT = Subtarget.getGRLenVT();
5055 unsigned MaskIdx, MaskLen;
5056 uint64_t Shamt;
5057
5058 // The first operand must be an AND and the second operand of the AND must be
5059 // a shifted mask.
5060 if (FirstOperand.getOpcode() != ISD::AND ||
5061 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5062 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5063 return SDValue();
5064
5065 // The second operand (shift amount) must be an immediate.
5066 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5067 return SDValue();
5068
5069 Shamt = CN->getZExtValue();
5070 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5071 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5072 FirstOperand->getOperand(0),
5073 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5074 DAG.getConstant(Shamt, DL, GRLenVT));
5075
5076 return SDValue();
5077}
5078
5079// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5080// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5081static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5082 unsigned Depth) {
5083 // Limit recursion.
5085 return false;
5086 switch (Src.getOpcode()) {
5087 case ISD::SETCC:
5088 case ISD::TRUNCATE:
5089 return Src.getOperand(0).getValueSizeInBits() == Size;
5090 case ISD::FREEZE:
5091 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5092 case ISD::AND:
5093 case ISD::XOR:
5094 case ISD::OR:
5095 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5096 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5097 case ISD::SELECT:
5098 case ISD::VSELECT:
5099 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5100 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5101 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5102 case ISD::BUILD_VECTOR:
5103 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5104 ISD::isBuildVectorAllOnes(Src.getNode());
5105 }
5106 return false;
5107}
5108
5109// Helper to push sign extension of vXi1 SETCC result through bitops.
5111 SDValue Src, const SDLoc &DL) {
5112 switch (Src.getOpcode()) {
5113 case ISD::SETCC:
5114 case ISD::FREEZE:
5115 case ISD::TRUNCATE:
5116 case ISD::BUILD_VECTOR:
5117 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5118 case ISD::AND:
5119 case ISD::XOR:
5120 case ISD::OR:
5121 return DAG.getNode(
5122 Src.getOpcode(), DL, SExtVT,
5123 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5124 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5125 case ISD::SELECT:
5126 case ISD::VSELECT:
5127 return DAG.getSelect(
5128 DL, SExtVT, Src.getOperand(0),
5129 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5130 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5131 }
5132 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5133}
5134
5135static SDValue
5138 const LoongArchSubtarget &Subtarget) {
5139 SDLoc DL(N);
5140 EVT VT = N->getValueType(0);
5141 SDValue Src = N->getOperand(0);
5142 EVT SrcVT = Src.getValueType();
5143
5144 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5145 return SDValue();
5146
5147 bool UseLASX;
5148 unsigned Opc = ISD::DELETED_NODE;
5149 EVT CmpVT = Src.getOperand(0).getValueType();
5150 EVT EltVT = CmpVT.getVectorElementType();
5151
5152 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5153 UseLASX = false;
5154 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5155 CmpVT.getSizeInBits() == 256)
5156 UseLASX = true;
5157 else
5158 return SDValue();
5159
5160 SDValue SrcN1 = Src.getOperand(1);
5161 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5162 default:
5163 break;
5164 case ISD::SETEQ:
5165 // x == 0 => not (vmsknez.b x)
5166 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5168 break;
5169 case ISD::SETGT:
5170 // x > -1 => vmskgez.b x
5171 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5173 break;
5174 case ISD::SETGE:
5175 // x >= 0 => vmskgez.b x
5176 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5178 break;
5179 case ISD::SETLT:
5180 // x < 0 => vmskltz.{b,h,w,d} x
5181 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5182 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5183 EltVT == MVT::i64))
5185 break;
5186 case ISD::SETLE:
5187 // x <= -1 => vmskltz.{b,h,w,d} x
5188 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5189 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5190 EltVT == MVT::i64))
5192 break;
5193 case ISD::SETNE:
5194 // x != 0 => vmsknez.b x
5195 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5197 break;
5198 }
5199
5200 if (Opc == ISD::DELETED_NODE)
5201 return SDValue();
5202
5203 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5205 V = DAG.getZExtOrTrunc(V, DL, T);
5206 return DAG.getBitcast(VT, V);
5207}
5208
5211 const LoongArchSubtarget &Subtarget) {
5212 SDLoc DL(N);
5213 EVT VT = N->getValueType(0);
5214 SDValue Src = N->getOperand(0);
5215 EVT SrcVT = Src.getValueType();
5216 MVT GRLenVT = Subtarget.getGRLenVT();
5217
5218 if (!DCI.isBeforeLegalizeOps())
5219 return SDValue();
5220
5221 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5222 return SDValue();
5223
5224 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5225 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5226 if (Res)
5227 return Res;
5228
5229 // Generate vXi1 using [X]VMSKLTZ
5230 MVT SExtVT;
5231 unsigned Opc;
5232 bool UseLASX = false;
5233 bool PropagateSExt = false;
5234
5235 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5236 EVT CmpVT = Src.getOperand(0).getValueType();
5237 if (CmpVT.getSizeInBits() > 256)
5238 return SDValue();
5239 }
5240
5241 switch (SrcVT.getSimpleVT().SimpleTy) {
5242 default:
5243 return SDValue();
5244 case MVT::v2i1:
5245 SExtVT = MVT::v2i64;
5246 break;
5247 case MVT::v4i1:
5248 SExtVT = MVT::v4i32;
5249 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5250 SExtVT = MVT::v4i64;
5251 UseLASX = true;
5252 PropagateSExt = true;
5253 }
5254 break;
5255 case MVT::v8i1:
5256 SExtVT = MVT::v8i16;
5257 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5258 SExtVT = MVT::v8i32;
5259 UseLASX = true;
5260 PropagateSExt = true;
5261 }
5262 break;
5263 case MVT::v16i1:
5264 SExtVT = MVT::v16i8;
5265 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5266 SExtVT = MVT::v16i16;
5267 UseLASX = true;
5268 PropagateSExt = true;
5269 }
5270 break;
5271 case MVT::v32i1:
5272 SExtVT = MVT::v32i8;
5273 UseLASX = true;
5274 break;
5275 };
5276 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5277 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5278
5279 SDValue V;
5280 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5281 if (Src.getSimpleValueType() == MVT::v32i8) {
5282 SDValue Lo, Hi;
5283 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5284 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5285 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5286 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5287 DAG.getConstant(16, DL, MVT::i8));
5288 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5289 } else if (UseLASX) {
5290 return SDValue();
5291 }
5292 }
5293
5294 if (!V) {
5296 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5297 }
5298
5300 V = DAG.getZExtOrTrunc(V, DL, T);
5301 return DAG.getBitcast(VT, V);
5302}
5303
5306 const LoongArchSubtarget &Subtarget) {
5307 MVT GRLenVT = Subtarget.getGRLenVT();
5308 EVT ValTy = N->getValueType(0);
5309 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5310 ConstantSDNode *CN0, *CN1;
5311 SDLoc DL(N);
5312 unsigned ValBits = ValTy.getSizeInBits();
5313 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5314 unsigned Shamt;
5315 bool SwapAndRetried = false;
5316
5317 // BSTRPICK requires the 32S feature.
5318 if (!Subtarget.has32S())
5319 return SDValue();
5320
5321 if (DCI.isBeforeLegalizeOps())
5322 return SDValue();
5323
5324 if (ValBits != 32 && ValBits != 64)
5325 return SDValue();
5326
5327Retry:
5328 // 1st pattern to match BSTRINS:
5329 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5330 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5331 // =>
5332 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5333 if (N0.getOpcode() == ISD::AND &&
5334 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5335 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5336 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5337 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5338 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5339 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5340 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5341 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5342 (MaskIdx0 + MaskLen0 <= ValBits)) {
5343 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5344 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5345 N1.getOperand(0).getOperand(0),
5346 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5347 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5348 }
5349
5350 // 2nd pattern to match BSTRINS:
5351 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5352 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5353 // =>
5354 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5355 if (N0.getOpcode() == ISD::AND &&
5356 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5357 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5358 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5359 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5360 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5361 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5362 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5363 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5364 (MaskIdx0 + MaskLen0 <= ValBits)) {
5365 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5366 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5367 N1.getOperand(0).getOperand(0),
5368 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5369 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5370 }
5371
5372 // 3rd pattern to match BSTRINS:
5373 // R = or (and X, mask0), (and Y, mask1)
5374 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5375 // =>
5376 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5377 // where msb = lsb + size - 1
5378 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5379 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5380 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5381 (MaskIdx0 + MaskLen0 <= 64) &&
5382 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5383 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5384 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5385 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5386 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5387 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5388 DAG.getConstant(ValBits == 32
5389 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5390 : (MaskIdx0 + MaskLen0 - 1),
5391 DL, GRLenVT),
5392 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5393 }
5394
5395 // 4th pattern to match BSTRINS:
5396 // R = or (and X, mask), (shl Y, shamt)
5397 // where mask = (2**shamt - 1)
5398 // =>
5399 // R = BSTRINS X, Y, ValBits - 1, shamt
5400 // where ValBits = 32 or 64
5401 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5402 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5403 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5404 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5405 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5406 (MaskIdx0 + MaskLen0 <= ValBits)) {
5407 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5408 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5409 N1.getOperand(0),
5410 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5411 DAG.getConstant(Shamt, DL, GRLenVT));
5412 }
5413
5414 // 5th pattern to match BSTRINS:
5415 // R = or (and X, mask), const
5416 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5417 // =>
5418 // R = BSTRINS X, (const >> lsb), msb, lsb
5419 // where msb = lsb + size - 1
5420 if (N0.getOpcode() == ISD::AND &&
5421 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5422 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5423 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5424 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5425 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5426 return DAG.getNode(
5427 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5428 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5429 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5430 : (MaskIdx0 + MaskLen0 - 1),
5431 DL, GRLenVT),
5432 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5433 }
5434
5435 // 6th pattern.
5436 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5437 // by the incoming bits are known to be zero.
5438 // =>
5439 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5440 //
5441 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5442 // pattern is more common than the 1st. So we put the 1st before the 6th in
5443 // order to match as many nodes as possible.
5444 ConstantSDNode *CNMask, *CNShamt;
5445 unsigned MaskIdx, MaskLen;
5446 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5447 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5448 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5449 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5450 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5451 Shamt = CNShamt->getZExtValue();
5452 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5453 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5454 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5455 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5456 N1.getOperand(0).getOperand(0),
5457 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5458 DAG.getConstant(Shamt, DL, GRLenVT));
5459 }
5460 }
5461
5462 // 7th pattern.
5463 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5464 // overwritten by the incoming bits are known to be zero.
5465 // =>
5466 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5467 //
5468 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5469 // before the 7th in order to match as many nodes as possible.
5470 if (N1.getOpcode() == ISD::AND &&
5471 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5472 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5473 N1.getOperand(0).getOpcode() == ISD::SHL &&
5474 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5475 CNShamt->getZExtValue() == MaskIdx) {
5476 APInt ShMask(ValBits, CNMask->getZExtValue());
5477 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5478 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5479 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5480 N1.getOperand(0).getOperand(0),
5481 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5482 DAG.getConstant(MaskIdx, DL, GRLenVT));
5483 }
5484 }
5485
5486 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5487 if (!SwapAndRetried) {
5488 std::swap(N0, N1);
5489 SwapAndRetried = true;
5490 goto Retry;
5491 }
5492
5493 SwapAndRetried = false;
5494Retry2:
5495 // 8th pattern.
5496 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5497 // the incoming bits are known to be zero.
5498 // =>
5499 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5500 //
5501 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5502 // we put it here in order to match as many nodes as possible or generate less
5503 // instructions.
5504 if (N1.getOpcode() == ISD::AND &&
5505 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5506 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5507 APInt ShMask(ValBits, CNMask->getZExtValue());
5508 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5509 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5510 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5511 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5512 N1->getOperand(0),
5513 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5514 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5515 DAG.getConstant(MaskIdx, DL, GRLenVT));
5516 }
5517 }
5518 // Swap N0/N1 and retry.
5519 if (!SwapAndRetried) {
5520 std::swap(N0, N1);
5521 SwapAndRetried = true;
5522 goto Retry2;
5523 }
5524
5525 return SDValue();
5526}
5527
5528static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5529 ExtType = ISD::NON_EXTLOAD;
5530
5531 switch (V.getNode()->getOpcode()) {
5532 case ISD::LOAD: {
5533 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5534 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5535 (LoadNode->getMemoryVT() == MVT::i16)) {
5536 ExtType = LoadNode->getExtensionType();
5537 return true;
5538 }
5539 return false;
5540 }
5541 case ISD::AssertSext: {
5542 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5543 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5544 ExtType = ISD::SEXTLOAD;
5545 return true;
5546 }
5547 return false;
5548 }
5549 case ISD::AssertZext: {
5550 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5551 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5552 ExtType = ISD::ZEXTLOAD;
5553 return true;
5554 }
5555 return false;
5556 }
5557 default:
5558 return false;
5559 }
5560
5561 return false;
5562}
5563
5564// Eliminate redundant truncation and zero-extension nodes.
5565// * Case 1:
5566// +------------+ +------------+ +------------+
5567// | Input1 | | Input2 | | CC |
5568// +------------+ +------------+ +------------+
5569// | | |
5570// V V +----+
5571// +------------+ +------------+ |
5572// | TRUNCATE | | TRUNCATE | |
5573// +------------+ +------------+ |
5574// | | |
5575// V V |
5576// +------------+ +------------+ |
5577// | ZERO_EXT | | ZERO_EXT | |
5578// +------------+ +------------+ |
5579// | | |
5580// | +-------------+ |
5581// V V | |
5582// +----------------+ | |
5583// | AND | | |
5584// +----------------+ | |
5585// | | |
5586// +---------------+ | |
5587// | | |
5588// V V V
5589// +-------------+
5590// | CMP |
5591// +-------------+
5592// * Case 2:
5593// +------------+ +------------+ +-------------+ +------------+ +------------+
5594// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5595// +------------+ +------------+ +-------------+ +------------+ +------------+
5596// | | | | |
5597// V | | | |
5598// +------------+ | | | |
5599// | XOR |<---------------------+ | |
5600// +------------+ | | |
5601// | | | |
5602// V V +---------------+ |
5603// +------------+ +------------+ | |
5604// | TRUNCATE | | TRUNCATE | | +-------------------------+
5605// +------------+ +------------+ | |
5606// | | | |
5607// V V | |
5608// +------------+ +------------+ | |
5609// | ZERO_EXT | | ZERO_EXT | | |
5610// +------------+ +------------+ | |
5611// | | | |
5612// V V | |
5613// +----------------+ | |
5614// | AND | | |
5615// +----------------+ | |
5616// | | |
5617// +---------------+ | |
5618// | | |
5619// V V V
5620// +-------------+
5621// | CMP |
5622// +-------------+
5625 const LoongArchSubtarget &Subtarget) {
5626 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5627
5628 SDNode *AndNode = N->getOperand(0).getNode();
5629 if (AndNode->getOpcode() != ISD::AND)
5630 return SDValue();
5631
5632 SDValue AndInputValue2 = AndNode->getOperand(1);
5633 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5634 return SDValue();
5635
5636 SDValue CmpInputValue = N->getOperand(1);
5637 SDValue AndInputValue1 = AndNode->getOperand(0);
5638 if (AndInputValue1.getOpcode() == ISD::XOR) {
5639 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5640 return SDValue();
5641 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5642 if (!CN || CN->getSExtValue() != -1)
5643 return SDValue();
5644 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5645 if (!CN || CN->getSExtValue() != 0)
5646 return SDValue();
5647 AndInputValue1 = AndInputValue1.getOperand(0);
5648 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5649 return SDValue();
5650 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5651 if (AndInputValue2 != CmpInputValue)
5652 return SDValue();
5653 } else {
5654 return SDValue();
5655 }
5656
5657 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5658 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5659 return SDValue();
5660
5661 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5662 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5663 return SDValue();
5664
5665 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5666 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5667 ISD::LoadExtType ExtType1;
5668 ISD::LoadExtType ExtType2;
5669
5670 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5671 !checkValueWidth(TruncInputValue2, ExtType2))
5672 return SDValue();
5673
5674 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5675 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5676 return SDValue();
5677
5678 if ((ExtType2 != ISD::ZEXTLOAD) &&
5679 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5680 return SDValue();
5681
5682 // These truncation and zero-extension nodes are not necessary, remove them.
5683 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5684 TruncInputValue1, TruncInputValue2);
5685 SDValue NewSetCC =
5686 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5687 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5688 return SDValue(N, 0);
5689}
5690
5691// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5694 const LoongArchSubtarget &Subtarget) {
5695 if (DCI.isBeforeLegalizeOps())
5696 return SDValue();
5697
5698 SDValue Src = N->getOperand(0);
5699 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5700 return SDValue();
5701
5702 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5703 Src.getOperand(0));
5704}
5705
5706// Perform common combines for BR_CC and SELECT_CC conditions.
5707static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5708 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5709 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5710
5711 // As far as arithmetic right shift always saves the sign,
5712 // shift can be omitted.
5713 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5714 // setge (sra X, N), 0 -> setge X, 0
5715 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5716 LHS.getOpcode() == ISD::SRA) {
5717 LHS = LHS.getOperand(0);
5718 return true;
5719 }
5720
5721 if (!ISD::isIntEqualitySetCC(CCVal))
5722 return false;
5723
5724 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5725 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5726 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5727 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5728 // If we're looking for eq 0 instead of ne 0, we need to invert the
5729 // condition.
5730 bool Invert = CCVal == ISD::SETEQ;
5731 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5732 if (Invert)
5733 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5734
5735 RHS = LHS.getOperand(1);
5736 LHS = LHS.getOperand(0);
5737 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5738
5739 CC = DAG.getCondCode(CCVal);
5740 return true;
5741 }
5742
5743 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5744 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5745 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5746 SDValue LHS0 = LHS.getOperand(0);
5747 if (LHS0.getOpcode() == ISD::AND &&
5748 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5749 uint64_t Mask = LHS0.getConstantOperandVal(1);
5750 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5751 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5752 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5753 CC = DAG.getCondCode(CCVal);
5754
5755 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5756 LHS = LHS0.getOperand(0);
5757 if (ShAmt != 0)
5758 LHS =
5759 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5760 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5761 return true;
5762 }
5763 }
5764 }
5765
5766 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5767 // This can occur when legalizing some floating point comparisons.
5768 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5769 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5770 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5771 CC = DAG.getCondCode(CCVal);
5772 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5773 return true;
5774 }
5775
5776 return false;
5777}
5778
5781 const LoongArchSubtarget &Subtarget) {
5782 SDValue LHS = N->getOperand(1);
5783 SDValue RHS = N->getOperand(2);
5784 SDValue CC = N->getOperand(3);
5785 SDLoc DL(N);
5786
5787 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5788 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5789 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5790
5791 return SDValue();
5792}
5793
5796 const LoongArchSubtarget &Subtarget) {
5797 // Transform
5798 SDValue LHS = N->getOperand(0);
5799 SDValue RHS = N->getOperand(1);
5800 SDValue CC = N->getOperand(2);
5801 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5802 SDValue TrueV = N->getOperand(3);
5803 SDValue FalseV = N->getOperand(4);
5804 SDLoc DL(N);
5805 EVT VT = N->getValueType(0);
5806
5807 // If the True and False values are the same, we don't need a select_cc.
5808 if (TrueV == FalseV)
5809 return TrueV;
5810
5811 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5812 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5813 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5815 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5816 if (CCVal == ISD::CondCode::SETGE)
5817 std::swap(TrueV, FalseV);
5818
5819 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5820 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5821 // Only handle simm12, if it is not in this range, it can be considered as
5822 // register.
5823 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5824 isInt<12>(TrueSImm - FalseSImm)) {
5825 SDValue SRA =
5826 DAG.getNode(ISD::SRA, DL, VT, LHS,
5827 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5828 SDValue AND =
5829 DAG.getNode(ISD::AND, DL, VT, SRA,
5830 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5831 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5832 }
5833
5834 if (CCVal == ISD::CondCode::SETGE)
5835 std::swap(TrueV, FalseV);
5836 }
5837
5838 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5839 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5840 {LHS, RHS, CC, TrueV, FalseV});
5841
5842 return SDValue();
5843}
5844
5845template <unsigned N>
5847 SelectionDAG &DAG,
5848 const LoongArchSubtarget &Subtarget,
5849 bool IsSigned = false) {
5850 SDLoc DL(Node);
5851 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5852 // Check the ImmArg.
5853 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5854 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5855 DAG.getContext()->emitError(Node->getOperationName(0) +
5856 ": argument out of range.");
5857 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5858 }
5859 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5860}
5861
5862template <unsigned N>
5863static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5864 SelectionDAG &DAG, bool IsSigned = false) {
5865 SDLoc DL(Node);
5866 EVT ResTy = Node->getValueType(0);
5867 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5868
5869 // Check the ImmArg.
5870 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5871 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5872 DAG.getContext()->emitError(Node->getOperationName(0) +
5873 ": argument out of range.");
5874 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5875 }
5876 return DAG.getConstant(
5878 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5879 DL, ResTy);
5880}
5881
5883 SDLoc DL(Node);
5884 EVT ResTy = Node->getValueType(0);
5885 SDValue Vec = Node->getOperand(2);
5886 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5887 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5888}
5889
5891 SDLoc DL(Node);
5892 EVT ResTy = Node->getValueType(0);
5893 SDValue One = DAG.getConstant(1, DL, ResTy);
5894 SDValue Bit =
5895 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5896
5897 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5898 DAG.getNOT(DL, Bit, ResTy));
5899}
5900
5901template <unsigned N>
5903 SDLoc DL(Node);
5904 EVT ResTy = Node->getValueType(0);
5905 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5906 // Check the unsigned ImmArg.
5907 if (!isUInt<N>(CImm->getZExtValue())) {
5908 DAG.getContext()->emitError(Node->getOperationName(0) +
5909 ": argument out of range.");
5910 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5911 }
5912
5913 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5914 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5915
5916 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5917}
5918
5919template <unsigned N>
5921 SDLoc DL(Node);
5922 EVT ResTy = Node->getValueType(0);
5923 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5924 // Check the unsigned ImmArg.
5925 if (!isUInt<N>(CImm->getZExtValue())) {
5926 DAG.getContext()->emitError(Node->getOperationName(0) +
5927 ": argument out of range.");
5928 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5929 }
5930
5931 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5932 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5933 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5934}
5935
5936template <unsigned N>
5938 SDLoc DL(Node);
5939 EVT ResTy = Node->getValueType(0);
5940 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5941 // Check the unsigned ImmArg.
5942 if (!isUInt<N>(CImm->getZExtValue())) {
5943 DAG.getContext()->emitError(Node->getOperationName(0) +
5944 ": argument out of range.");
5945 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5946 }
5947
5948 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5949 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5950 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5951}
5952
5953template <unsigned W>
5955 unsigned ResOp) {
5956 unsigned Imm = N->getConstantOperandVal(2);
5957 if (!isUInt<W>(Imm)) {
5958 const StringRef ErrorMsg = "argument out of range";
5959 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5960 return DAG.getUNDEF(N->getValueType(0));
5961 }
5962 SDLoc DL(N);
5963 SDValue Vec = N->getOperand(1);
5964 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
5966 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
5967}
5968
5969static SDValue
5972 const LoongArchSubtarget &Subtarget) {
5973 SDLoc DL(N);
5974 switch (N->getConstantOperandVal(0)) {
5975 default:
5976 break;
5977 case Intrinsic::loongarch_lsx_vadd_b:
5978 case Intrinsic::loongarch_lsx_vadd_h:
5979 case Intrinsic::loongarch_lsx_vadd_w:
5980 case Intrinsic::loongarch_lsx_vadd_d:
5981 case Intrinsic::loongarch_lasx_xvadd_b:
5982 case Intrinsic::loongarch_lasx_xvadd_h:
5983 case Intrinsic::loongarch_lasx_xvadd_w:
5984 case Intrinsic::loongarch_lasx_xvadd_d:
5985 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5986 N->getOperand(2));
5987 case Intrinsic::loongarch_lsx_vaddi_bu:
5988 case Intrinsic::loongarch_lsx_vaddi_hu:
5989 case Intrinsic::loongarch_lsx_vaddi_wu:
5990 case Intrinsic::loongarch_lsx_vaddi_du:
5991 case Intrinsic::loongarch_lasx_xvaddi_bu:
5992 case Intrinsic::loongarch_lasx_xvaddi_hu:
5993 case Intrinsic::loongarch_lasx_xvaddi_wu:
5994 case Intrinsic::loongarch_lasx_xvaddi_du:
5995 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5996 lowerVectorSplatImm<5>(N, 2, DAG));
5997 case Intrinsic::loongarch_lsx_vsub_b:
5998 case Intrinsic::loongarch_lsx_vsub_h:
5999 case Intrinsic::loongarch_lsx_vsub_w:
6000 case Intrinsic::loongarch_lsx_vsub_d:
6001 case Intrinsic::loongarch_lasx_xvsub_b:
6002 case Intrinsic::loongarch_lasx_xvsub_h:
6003 case Intrinsic::loongarch_lasx_xvsub_w:
6004 case Intrinsic::loongarch_lasx_xvsub_d:
6005 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6006 N->getOperand(2));
6007 case Intrinsic::loongarch_lsx_vsubi_bu:
6008 case Intrinsic::loongarch_lsx_vsubi_hu:
6009 case Intrinsic::loongarch_lsx_vsubi_wu:
6010 case Intrinsic::loongarch_lsx_vsubi_du:
6011 case Intrinsic::loongarch_lasx_xvsubi_bu:
6012 case Intrinsic::loongarch_lasx_xvsubi_hu:
6013 case Intrinsic::loongarch_lasx_xvsubi_wu:
6014 case Intrinsic::loongarch_lasx_xvsubi_du:
6015 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6016 lowerVectorSplatImm<5>(N, 2, DAG));
6017 case Intrinsic::loongarch_lsx_vneg_b:
6018 case Intrinsic::loongarch_lsx_vneg_h:
6019 case Intrinsic::loongarch_lsx_vneg_w:
6020 case Intrinsic::loongarch_lsx_vneg_d:
6021 case Intrinsic::loongarch_lasx_xvneg_b:
6022 case Intrinsic::loongarch_lasx_xvneg_h:
6023 case Intrinsic::loongarch_lasx_xvneg_w:
6024 case Intrinsic::loongarch_lasx_xvneg_d:
6025 return DAG.getNode(
6026 ISD::SUB, DL, N->getValueType(0),
6027 DAG.getConstant(
6028 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6029 /*isSigned=*/true),
6030 SDLoc(N), N->getValueType(0)),
6031 N->getOperand(1));
6032 case Intrinsic::loongarch_lsx_vmax_b:
6033 case Intrinsic::loongarch_lsx_vmax_h:
6034 case Intrinsic::loongarch_lsx_vmax_w:
6035 case Intrinsic::loongarch_lsx_vmax_d:
6036 case Intrinsic::loongarch_lasx_xvmax_b:
6037 case Intrinsic::loongarch_lasx_xvmax_h:
6038 case Intrinsic::loongarch_lasx_xvmax_w:
6039 case Intrinsic::loongarch_lasx_xvmax_d:
6040 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6041 N->getOperand(2));
6042 case Intrinsic::loongarch_lsx_vmax_bu:
6043 case Intrinsic::loongarch_lsx_vmax_hu:
6044 case Intrinsic::loongarch_lsx_vmax_wu:
6045 case Intrinsic::loongarch_lsx_vmax_du:
6046 case Intrinsic::loongarch_lasx_xvmax_bu:
6047 case Intrinsic::loongarch_lasx_xvmax_hu:
6048 case Intrinsic::loongarch_lasx_xvmax_wu:
6049 case Intrinsic::loongarch_lasx_xvmax_du:
6050 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6051 N->getOperand(2));
6052 case Intrinsic::loongarch_lsx_vmaxi_b:
6053 case Intrinsic::loongarch_lsx_vmaxi_h:
6054 case Intrinsic::loongarch_lsx_vmaxi_w:
6055 case Intrinsic::loongarch_lsx_vmaxi_d:
6056 case Intrinsic::loongarch_lasx_xvmaxi_b:
6057 case Intrinsic::loongarch_lasx_xvmaxi_h:
6058 case Intrinsic::loongarch_lasx_xvmaxi_w:
6059 case Intrinsic::loongarch_lasx_xvmaxi_d:
6060 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6061 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6062 case Intrinsic::loongarch_lsx_vmaxi_bu:
6063 case Intrinsic::loongarch_lsx_vmaxi_hu:
6064 case Intrinsic::loongarch_lsx_vmaxi_wu:
6065 case Intrinsic::loongarch_lsx_vmaxi_du:
6066 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6067 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6068 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6069 case Intrinsic::loongarch_lasx_xvmaxi_du:
6070 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6071 lowerVectorSplatImm<5>(N, 2, DAG));
6072 case Intrinsic::loongarch_lsx_vmin_b:
6073 case Intrinsic::loongarch_lsx_vmin_h:
6074 case Intrinsic::loongarch_lsx_vmin_w:
6075 case Intrinsic::loongarch_lsx_vmin_d:
6076 case Intrinsic::loongarch_lasx_xvmin_b:
6077 case Intrinsic::loongarch_lasx_xvmin_h:
6078 case Intrinsic::loongarch_lasx_xvmin_w:
6079 case Intrinsic::loongarch_lasx_xvmin_d:
6080 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6081 N->getOperand(2));
6082 case Intrinsic::loongarch_lsx_vmin_bu:
6083 case Intrinsic::loongarch_lsx_vmin_hu:
6084 case Intrinsic::loongarch_lsx_vmin_wu:
6085 case Intrinsic::loongarch_lsx_vmin_du:
6086 case Intrinsic::loongarch_lasx_xvmin_bu:
6087 case Intrinsic::loongarch_lasx_xvmin_hu:
6088 case Intrinsic::loongarch_lasx_xvmin_wu:
6089 case Intrinsic::loongarch_lasx_xvmin_du:
6090 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6091 N->getOperand(2));
6092 case Intrinsic::loongarch_lsx_vmini_b:
6093 case Intrinsic::loongarch_lsx_vmini_h:
6094 case Intrinsic::loongarch_lsx_vmini_w:
6095 case Intrinsic::loongarch_lsx_vmini_d:
6096 case Intrinsic::loongarch_lasx_xvmini_b:
6097 case Intrinsic::loongarch_lasx_xvmini_h:
6098 case Intrinsic::loongarch_lasx_xvmini_w:
6099 case Intrinsic::loongarch_lasx_xvmini_d:
6100 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6101 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6102 case Intrinsic::loongarch_lsx_vmini_bu:
6103 case Intrinsic::loongarch_lsx_vmini_hu:
6104 case Intrinsic::loongarch_lsx_vmini_wu:
6105 case Intrinsic::loongarch_lsx_vmini_du:
6106 case Intrinsic::loongarch_lasx_xvmini_bu:
6107 case Intrinsic::loongarch_lasx_xvmini_hu:
6108 case Intrinsic::loongarch_lasx_xvmini_wu:
6109 case Intrinsic::loongarch_lasx_xvmini_du:
6110 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6111 lowerVectorSplatImm<5>(N, 2, DAG));
6112 case Intrinsic::loongarch_lsx_vmul_b:
6113 case Intrinsic::loongarch_lsx_vmul_h:
6114 case Intrinsic::loongarch_lsx_vmul_w:
6115 case Intrinsic::loongarch_lsx_vmul_d:
6116 case Intrinsic::loongarch_lasx_xvmul_b:
6117 case Intrinsic::loongarch_lasx_xvmul_h:
6118 case Intrinsic::loongarch_lasx_xvmul_w:
6119 case Intrinsic::loongarch_lasx_xvmul_d:
6120 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6121 N->getOperand(2));
6122 case Intrinsic::loongarch_lsx_vmadd_b:
6123 case Intrinsic::loongarch_lsx_vmadd_h:
6124 case Intrinsic::loongarch_lsx_vmadd_w:
6125 case Intrinsic::loongarch_lsx_vmadd_d:
6126 case Intrinsic::loongarch_lasx_xvmadd_b:
6127 case Intrinsic::loongarch_lasx_xvmadd_h:
6128 case Intrinsic::loongarch_lasx_xvmadd_w:
6129 case Intrinsic::loongarch_lasx_xvmadd_d: {
6130 EVT ResTy = N->getValueType(0);
6131 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6132 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6133 N->getOperand(3)));
6134 }
6135 case Intrinsic::loongarch_lsx_vmsub_b:
6136 case Intrinsic::loongarch_lsx_vmsub_h:
6137 case Intrinsic::loongarch_lsx_vmsub_w:
6138 case Intrinsic::loongarch_lsx_vmsub_d:
6139 case Intrinsic::loongarch_lasx_xvmsub_b:
6140 case Intrinsic::loongarch_lasx_xvmsub_h:
6141 case Intrinsic::loongarch_lasx_xvmsub_w:
6142 case Intrinsic::loongarch_lasx_xvmsub_d: {
6143 EVT ResTy = N->getValueType(0);
6144 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6145 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6146 N->getOperand(3)));
6147 }
6148 case Intrinsic::loongarch_lsx_vdiv_b:
6149 case Intrinsic::loongarch_lsx_vdiv_h:
6150 case Intrinsic::loongarch_lsx_vdiv_w:
6151 case Intrinsic::loongarch_lsx_vdiv_d:
6152 case Intrinsic::loongarch_lasx_xvdiv_b:
6153 case Intrinsic::loongarch_lasx_xvdiv_h:
6154 case Intrinsic::loongarch_lasx_xvdiv_w:
6155 case Intrinsic::loongarch_lasx_xvdiv_d:
6156 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6157 N->getOperand(2));
6158 case Intrinsic::loongarch_lsx_vdiv_bu:
6159 case Intrinsic::loongarch_lsx_vdiv_hu:
6160 case Intrinsic::loongarch_lsx_vdiv_wu:
6161 case Intrinsic::loongarch_lsx_vdiv_du:
6162 case Intrinsic::loongarch_lasx_xvdiv_bu:
6163 case Intrinsic::loongarch_lasx_xvdiv_hu:
6164 case Intrinsic::loongarch_lasx_xvdiv_wu:
6165 case Intrinsic::loongarch_lasx_xvdiv_du:
6166 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6167 N->getOperand(2));
6168 case Intrinsic::loongarch_lsx_vmod_b:
6169 case Intrinsic::loongarch_lsx_vmod_h:
6170 case Intrinsic::loongarch_lsx_vmod_w:
6171 case Intrinsic::loongarch_lsx_vmod_d:
6172 case Intrinsic::loongarch_lasx_xvmod_b:
6173 case Intrinsic::loongarch_lasx_xvmod_h:
6174 case Intrinsic::loongarch_lasx_xvmod_w:
6175 case Intrinsic::loongarch_lasx_xvmod_d:
6176 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6177 N->getOperand(2));
6178 case Intrinsic::loongarch_lsx_vmod_bu:
6179 case Intrinsic::loongarch_lsx_vmod_hu:
6180 case Intrinsic::loongarch_lsx_vmod_wu:
6181 case Intrinsic::loongarch_lsx_vmod_du:
6182 case Intrinsic::loongarch_lasx_xvmod_bu:
6183 case Intrinsic::loongarch_lasx_xvmod_hu:
6184 case Intrinsic::loongarch_lasx_xvmod_wu:
6185 case Intrinsic::loongarch_lasx_xvmod_du:
6186 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6187 N->getOperand(2));
6188 case Intrinsic::loongarch_lsx_vand_v:
6189 case Intrinsic::loongarch_lasx_xvand_v:
6190 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6191 N->getOperand(2));
6192 case Intrinsic::loongarch_lsx_vor_v:
6193 case Intrinsic::loongarch_lasx_xvor_v:
6194 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6195 N->getOperand(2));
6196 case Intrinsic::loongarch_lsx_vxor_v:
6197 case Intrinsic::loongarch_lasx_xvxor_v:
6198 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6199 N->getOperand(2));
6200 case Intrinsic::loongarch_lsx_vnor_v:
6201 case Intrinsic::loongarch_lasx_xvnor_v: {
6202 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6203 N->getOperand(2));
6204 return DAG.getNOT(DL, Res, Res->getValueType(0));
6205 }
6206 case Intrinsic::loongarch_lsx_vandi_b:
6207 case Intrinsic::loongarch_lasx_xvandi_b:
6208 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6209 lowerVectorSplatImm<8>(N, 2, DAG));
6210 case Intrinsic::loongarch_lsx_vori_b:
6211 case Intrinsic::loongarch_lasx_xvori_b:
6212 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6213 lowerVectorSplatImm<8>(N, 2, DAG));
6214 case Intrinsic::loongarch_lsx_vxori_b:
6215 case Intrinsic::loongarch_lasx_xvxori_b:
6216 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6217 lowerVectorSplatImm<8>(N, 2, DAG));
6218 case Intrinsic::loongarch_lsx_vsll_b:
6219 case Intrinsic::loongarch_lsx_vsll_h:
6220 case Intrinsic::loongarch_lsx_vsll_w:
6221 case Intrinsic::loongarch_lsx_vsll_d:
6222 case Intrinsic::loongarch_lasx_xvsll_b:
6223 case Intrinsic::loongarch_lasx_xvsll_h:
6224 case Intrinsic::loongarch_lasx_xvsll_w:
6225 case Intrinsic::loongarch_lasx_xvsll_d:
6226 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6227 truncateVecElts(N, DAG));
6228 case Intrinsic::loongarch_lsx_vslli_b:
6229 case Intrinsic::loongarch_lasx_xvslli_b:
6230 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6231 lowerVectorSplatImm<3>(N, 2, DAG));
6232 case Intrinsic::loongarch_lsx_vslli_h:
6233 case Intrinsic::loongarch_lasx_xvslli_h:
6234 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6235 lowerVectorSplatImm<4>(N, 2, DAG));
6236 case Intrinsic::loongarch_lsx_vslli_w:
6237 case Intrinsic::loongarch_lasx_xvslli_w:
6238 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6239 lowerVectorSplatImm<5>(N, 2, DAG));
6240 case Intrinsic::loongarch_lsx_vslli_d:
6241 case Intrinsic::loongarch_lasx_xvslli_d:
6242 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6243 lowerVectorSplatImm<6>(N, 2, DAG));
6244 case Intrinsic::loongarch_lsx_vsrl_b:
6245 case Intrinsic::loongarch_lsx_vsrl_h:
6246 case Intrinsic::loongarch_lsx_vsrl_w:
6247 case Intrinsic::loongarch_lsx_vsrl_d:
6248 case Intrinsic::loongarch_lasx_xvsrl_b:
6249 case Intrinsic::loongarch_lasx_xvsrl_h:
6250 case Intrinsic::loongarch_lasx_xvsrl_w:
6251 case Intrinsic::loongarch_lasx_xvsrl_d:
6252 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6253 truncateVecElts(N, DAG));
6254 case Intrinsic::loongarch_lsx_vsrli_b:
6255 case Intrinsic::loongarch_lasx_xvsrli_b:
6256 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6257 lowerVectorSplatImm<3>(N, 2, DAG));
6258 case Intrinsic::loongarch_lsx_vsrli_h:
6259 case Intrinsic::loongarch_lasx_xvsrli_h:
6260 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6261 lowerVectorSplatImm<4>(N, 2, DAG));
6262 case Intrinsic::loongarch_lsx_vsrli_w:
6263 case Intrinsic::loongarch_lasx_xvsrli_w:
6264 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6265 lowerVectorSplatImm<5>(N, 2, DAG));
6266 case Intrinsic::loongarch_lsx_vsrli_d:
6267 case Intrinsic::loongarch_lasx_xvsrli_d:
6268 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6269 lowerVectorSplatImm<6>(N, 2, DAG));
6270 case Intrinsic::loongarch_lsx_vsra_b:
6271 case Intrinsic::loongarch_lsx_vsra_h:
6272 case Intrinsic::loongarch_lsx_vsra_w:
6273 case Intrinsic::loongarch_lsx_vsra_d:
6274 case Intrinsic::loongarch_lasx_xvsra_b:
6275 case Intrinsic::loongarch_lasx_xvsra_h:
6276 case Intrinsic::loongarch_lasx_xvsra_w:
6277 case Intrinsic::loongarch_lasx_xvsra_d:
6278 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6279 truncateVecElts(N, DAG));
6280 case Intrinsic::loongarch_lsx_vsrai_b:
6281 case Intrinsic::loongarch_lasx_xvsrai_b:
6282 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6283 lowerVectorSplatImm<3>(N, 2, DAG));
6284 case Intrinsic::loongarch_lsx_vsrai_h:
6285 case Intrinsic::loongarch_lasx_xvsrai_h:
6286 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6287 lowerVectorSplatImm<4>(N, 2, DAG));
6288 case Intrinsic::loongarch_lsx_vsrai_w:
6289 case Intrinsic::loongarch_lasx_xvsrai_w:
6290 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6291 lowerVectorSplatImm<5>(N, 2, DAG));
6292 case Intrinsic::loongarch_lsx_vsrai_d:
6293 case Intrinsic::loongarch_lasx_xvsrai_d:
6294 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6295 lowerVectorSplatImm<6>(N, 2, DAG));
6296 case Intrinsic::loongarch_lsx_vclz_b:
6297 case Intrinsic::loongarch_lsx_vclz_h:
6298 case Intrinsic::loongarch_lsx_vclz_w:
6299 case Intrinsic::loongarch_lsx_vclz_d:
6300 case Intrinsic::loongarch_lasx_xvclz_b:
6301 case Intrinsic::loongarch_lasx_xvclz_h:
6302 case Intrinsic::loongarch_lasx_xvclz_w:
6303 case Intrinsic::loongarch_lasx_xvclz_d:
6304 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6305 case Intrinsic::loongarch_lsx_vpcnt_b:
6306 case Intrinsic::loongarch_lsx_vpcnt_h:
6307 case Intrinsic::loongarch_lsx_vpcnt_w:
6308 case Intrinsic::loongarch_lsx_vpcnt_d:
6309 case Intrinsic::loongarch_lasx_xvpcnt_b:
6310 case Intrinsic::loongarch_lasx_xvpcnt_h:
6311 case Intrinsic::loongarch_lasx_xvpcnt_w:
6312 case Intrinsic::loongarch_lasx_xvpcnt_d:
6313 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6314 case Intrinsic::loongarch_lsx_vbitclr_b:
6315 case Intrinsic::loongarch_lsx_vbitclr_h:
6316 case Intrinsic::loongarch_lsx_vbitclr_w:
6317 case Intrinsic::loongarch_lsx_vbitclr_d:
6318 case Intrinsic::loongarch_lasx_xvbitclr_b:
6319 case Intrinsic::loongarch_lasx_xvbitclr_h:
6320 case Intrinsic::loongarch_lasx_xvbitclr_w:
6321 case Intrinsic::loongarch_lasx_xvbitclr_d:
6322 return lowerVectorBitClear(N, DAG);
6323 case Intrinsic::loongarch_lsx_vbitclri_b:
6324 case Intrinsic::loongarch_lasx_xvbitclri_b:
6325 return lowerVectorBitClearImm<3>(N, DAG);
6326 case Intrinsic::loongarch_lsx_vbitclri_h:
6327 case Intrinsic::loongarch_lasx_xvbitclri_h:
6328 return lowerVectorBitClearImm<4>(N, DAG);
6329 case Intrinsic::loongarch_lsx_vbitclri_w:
6330 case Intrinsic::loongarch_lasx_xvbitclri_w:
6331 return lowerVectorBitClearImm<5>(N, DAG);
6332 case Intrinsic::loongarch_lsx_vbitclri_d:
6333 case Intrinsic::loongarch_lasx_xvbitclri_d:
6334 return lowerVectorBitClearImm<6>(N, DAG);
6335 case Intrinsic::loongarch_lsx_vbitset_b:
6336 case Intrinsic::loongarch_lsx_vbitset_h:
6337 case Intrinsic::loongarch_lsx_vbitset_w:
6338 case Intrinsic::loongarch_lsx_vbitset_d:
6339 case Intrinsic::loongarch_lasx_xvbitset_b:
6340 case Intrinsic::loongarch_lasx_xvbitset_h:
6341 case Intrinsic::loongarch_lasx_xvbitset_w:
6342 case Intrinsic::loongarch_lasx_xvbitset_d: {
6343 EVT VecTy = N->getValueType(0);
6344 SDValue One = DAG.getConstant(1, DL, VecTy);
6345 return DAG.getNode(
6346 ISD::OR, DL, VecTy, N->getOperand(1),
6347 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6348 }
6349 case Intrinsic::loongarch_lsx_vbitseti_b:
6350 case Intrinsic::loongarch_lasx_xvbitseti_b:
6351 return lowerVectorBitSetImm<3>(N, DAG);
6352 case Intrinsic::loongarch_lsx_vbitseti_h:
6353 case Intrinsic::loongarch_lasx_xvbitseti_h:
6354 return lowerVectorBitSetImm<4>(N, DAG);
6355 case Intrinsic::loongarch_lsx_vbitseti_w:
6356 case Intrinsic::loongarch_lasx_xvbitseti_w:
6357 return lowerVectorBitSetImm<5>(N, DAG);
6358 case Intrinsic::loongarch_lsx_vbitseti_d:
6359 case Intrinsic::loongarch_lasx_xvbitseti_d:
6360 return lowerVectorBitSetImm<6>(N, DAG);
6361 case Intrinsic::loongarch_lsx_vbitrev_b:
6362 case Intrinsic::loongarch_lsx_vbitrev_h:
6363 case Intrinsic::loongarch_lsx_vbitrev_w:
6364 case Intrinsic::loongarch_lsx_vbitrev_d:
6365 case Intrinsic::loongarch_lasx_xvbitrev_b:
6366 case Intrinsic::loongarch_lasx_xvbitrev_h:
6367 case Intrinsic::loongarch_lasx_xvbitrev_w:
6368 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6369 EVT VecTy = N->getValueType(0);
6370 SDValue One = DAG.getConstant(1, DL, VecTy);
6371 return DAG.getNode(
6372 ISD::XOR, DL, VecTy, N->getOperand(1),
6373 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6374 }
6375 case Intrinsic::loongarch_lsx_vbitrevi_b:
6376 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6377 return lowerVectorBitRevImm<3>(N, DAG);
6378 case Intrinsic::loongarch_lsx_vbitrevi_h:
6379 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6380 return lowerVectorBitRevImm<4>(N, DAG);
6381 case Intrinsic::loongarch_lsx_vbitrevi_w:
6382 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6383 return lowerVectorBitRevImm<5>(N, DAG);
6384 case Intrinsic::loongarch_lsx_vbitrevi_d:
6385 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6386 return lowerVectorBitRevImm<6>(N, DAG);
6387 case Intrinsic::loongarch_lsx_vfadd_s:
6388 case Intrinsic::loongarch_lsx_vfadd_d:
6389 case Intrinsic::loongarch_lasx_xvfadd_s:
6390 case Intrinsic::loongarch_lasx_xvfadd_d:
6391 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6392 N->getOperand(2));
6393 case Intrinsic::loongarch_lsx_vfsub_s:
6394 case Intrinsic::loongarch_lsx_vfsub_d:
6395 case Intrinsic::loongarch_lasx_xvfsub_s:
6396 case Intrinsic::loongarch_lasx_xvfsub_d:
6397 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6398 N->getOperand(2));
6399 case Intrinsic::loongarch_lsx_vfmul_s:
6400 case Intrinsic::loongarch_lsx_vfmul_d:
6401 case Intrinsic::loongarch_lasx_xvfmul_s:
6402 case Intrinsic::loongarch_lasx_xvfmul_d:
6403 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6404 N->getOperand(2));
6405 case Intrinsic::loongarch_lsx_vfdiv_s:
6406 case Intrinsic::loongarch_lsx_vfdiv_d:
6407 case Intrinsic::loongarch_lasx_xvfdiv_s:
6408 case Intrinsic::loongarch_lasx_xvfdiv_d:
6409 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6410 N->getOperand(2));
6411 case Intrinsic::loongarch_lsx_vfmadd_s:
6412 case Intrinsic::loongarch_lsx_vfmadd_d:
6413 case Intrinsic::loongarch_lasx_xvfmadd_s:
6414 case Intrinsic::loongarch_lasx_xvfmadd_d:
6415 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6416 N->getOperand(2), N->getOperand(3));
6417 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6418 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6419 N->getOperand(1), N->getOperand(2),
6420 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6421 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6422 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6423 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6424 N->getOperand(1), N->getOperand(2),
6425 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6426 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6427 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6428 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6429 N->getOperand(1), N->getOperand(2),
6430 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6431 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6432 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6433 N->getOperand(1), N->getOperand(2),
6434 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6435 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6436 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6437 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6438 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6439 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6440 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6441 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6442 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6443 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6444 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6445 N->getOperand(1)));
6446 case Intrinsic::loongarch_lsx_vreplve_b:
6447 case Intrinsic::loongarch_lsx_vreplve_h:
6448 case Intrinsic::loongarch_lsx_vreplve_w:
6449 case Intrinsic::loongarch_lsx_vreplve_d:
6450 case Intrinsic::loongarch_lasx_xvreplve_b:
6451 case Intrinsic::loongarch_lasx_xvreplve_h:
6452 case Intrinsic::loongarch_lasx_xvreplve_w:
6453 case Intrinsic::loongarch_lasx_xvreplve_d:
6454 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6455 N->getOperand(1),
6456 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6457 N->getOperand(2)));
6458 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6459 if (!Subtarget.is64Bit())
6461 break;
6462 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6463 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6464 if (!Subtarget.is64Bit())
6466 break;
6467 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6468 if (!Subtarget.is64Bit())
6470 break;
6471 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6472 if (!Subtarget.is64Bit())
6474 break;
6475 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6476 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6477 if (!Subtarget.is64Bit())
6479 break;
6480 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6481 if (!Subtarget.is64Bit())
6483 break;
6484 case Intrinsic::loongarch_lsx_bz_b:
6485 case Intrinsic::loongarch_lsx_bz_h:
6486 case Intrinsic::loongarch_lsx_bz_w:
6487 case Intrinsic::loongarch_lsx_bz_d:
6488 case Intrinsic::loongarch_lasx_xbz_b:
6489 case Intrinsic::loongarch_lasx_xbz_h:
6490 case Intrinsic::loongarch_lasx_xbz_w:
6491 case Intrinsic::loongarch_lasx_xbz_d:
6492 if (!Subtarget.is64Bit())
6493 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6494 N->getOperand(1));
6495 break;
6496 case Intrinsic::loongarch_lsx_bz_v:
6497 case Intrinsic::loongarch_lasx_xbz_v:
6498 if (!Subtarget.is64Bit())
6499 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6500 N->getOperand(1));
6501 break;
6502 case Intrinsic::loongarch_lsx_bnz_b:
6503 case Intrinsic::loongarch_lsx_bnz_h:
6504 case Intrinsic::loongarch_lsx_bnz_w:
6505 case Intrinsic::loongarch_lsx_bnz_d:
6506 case Intrinsic::loongarch_lasx_xbnz_b:
6507 case Intrinsic::loongarch_lasx_xbnz_h:
6508 case Intrinsic::loongarch_lasx_xbnz_w:
6509 case Intrinsic::loongarch_lasx_xbnz_d:
6510 if (!Subtarget.is64Bit())
6511 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6512 N->getOperand(1));
6513 break;
6514 case Intrinsic::loongarch_lsx_bnz_v:
6515 case Intrinsic::loongarch_lasx_xbnz_v:
6516 if (!Subtarget.is64Bit())
6517 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6518 N->getOperand(1));
6519 break;
6520 }
6521 return SDValue();
6522}
6523
6526 const LoongArchSubtarget &Subtarget) {
6527 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6528 // conversion is unnecessary and can be replaced with the
6529 // MOVFR2GR_S_LA64 operand.
6530 SDValue Op0 = N->getOperand(0);
6532 return Op0.getOperand(0);
6533 return SDValue();
6534}
6535
6538 const LoongArchSubtarget &Subtarget) {
6539 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6540 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6541 // operand.
6542 SDValue Op0 = N->getOperand(0);
6544 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6545 "Unexpected value type!");
6546 return Op0.getOperand(0);
6547 }
6548 return SDValue();
6549}
6550
6553 const LoongArchSubtarget &Subtarget) {
6554 MVT VT = N->getSimpleValueType(0);
6555 unsigned NumBits = VT.getScalarSizeInBits();
6556
6557 // Simplify the inputs.
6558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6559 APInt DemandedMask(APInt::getAllOnes(NumBits));
6560 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6561 return SDValue(N, 0);
6562
6563 return SDValue();
6564}
6565
6566static SDValue
6569 const LoongArchSubtarget &Subtarget) {
6570 SDValue Op0 = N->getOperand(0);
6571 SDLoc DL(N);
6572
6573 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6574 // redundant. Instead, use BuildPairF64's operands directly.
6576 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6577
6578 if (Op0->isUndef()) {
6579 SDValue Lo = DAG.getUNDEF(MVT::i32);
6580 SDValue Hi = DAG.getUNDEF(MVT::i32);
6581 return DCI.CombineTo(N, Lo, Hi);
6582 }
6583
6584 // It's cheaper to materialise two 32-bit integers than to load a double
6585 // from the constant pool and transfer it to integer registers through the
6586 // stack.
6588 APInt V = C->getValueAPF().bitcastToAPInt();
6589 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6590 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6591 return DCI.CombineTo(N, Lo, Hi);
6592 }
6593
6594 return SDValue();
6595}
6596
6597static SDValue
6600 const LoongArchSubtarget &Subtarget) {
6601 if (!DCI.isBeforeLegalize())
6602 return SDValue();
6603
6604 MVT EltVT = N->getSimpleValueType(0);
6605 SDValue Vec = N->getOperand(0);
6606 EVT VecTy = Vec->getValueType(0);
6607 SDValue Idx = N->getOperand(1);
6608 unsigned IdxOp = Idx.getOpcode();
6609 SDLoc DL(N);
6610
6611 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6612 return SDValue();
6613
6614 // Combine:
6615 // t2 = truncate t1
6616 // t3 = {zero/sign/any}_extend t2
6617 // t4 = extract_vector_elt t0, t3
6618 // to:
6619 // t4 = extract_vector_elt t0, t1
6620 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6621 IdxOp == ISD::ANY_EXTEND) {
6622 SDValue IdxOrig = Idx.getOperand(0);
6623 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6624 return SDValue();
6625
6626 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6627 IdxOrig.getOperand(0));
6628 }
6629
6630 return SDValue();
6631}
6632
6634 DAGCombinerInfo &DCI) const {
6635 SelectionDAG &DAG = DCI.DAG;
6636 switch (N->getOpcode()) {
6637 default:
6638 break;
6639 case ISD::AND:
6640 return performANDCombine(N, DAG, DCI, Subtarget);
6641 case ISD::OR:
6642 return performORCombine(N, DAG, DCI, Subtarget);
6643 case ISD::SETCC:
6644 return performSETCCCombine(N, DAG, DCI, Subtarget);
6645 case ISD::SRL:
6646 return performSRLCombine(N, DAG, DCI, Subtarget);
6647 case ISD::BITCAST:
6648 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6650 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6652 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6654 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6656 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6658 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6660 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6663 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6665 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6667 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6668 }
6669 return SDValue();
6670}
6671
6674 if (!ZeroDivCheck)
6675 return MBB;
6676
6677 // Build instructions:
6678 // MBB:
6679 // div(or mod) $dst, $dividend, $divisor
6680 // bne $divisor, $zero, SinkMBB
6681 // BreakMBB:
6682 // break 7 // BRK_DIVZERO
6683 // SinkMBB:
6684 // fallthrough
6685 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6686 MachineFunction::iterator It = ++MBB->getIterator();
6687 MachineFunction *MF = MBB->getParent();
6688 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6689 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6690 MF->insert(It, BreakMBB);
6691 MF->insert(It, SinkMBB);
6692
6693 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6694 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6695 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6696
6697 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6698 DebugLoc DL = MI.getDebugLoc();
6699 MachineOperand &Divisor = MI.getOperand(2);
6700 Register DivisorReg = Divisor.getReg();
6701
6702 // MBB:
6703 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6704 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6705 .addReg(LoongArch::R0)
6706 .addMBB(SinkMBB);
6707 MBB->addSuccessor(BreakMBB);
6708 MBB->addSuccessor(SinkMBB);
6709
6710 // BreakMBB:
6711 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6712 // definition of BRK_DIVZERO.
6713 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6714 BreakMBB->addSuccessor(SinkMBB);
6715
6716 // Clear Divisor's kill flag.
6717 Divisor.setIsKill(false);
6718
6719 return SinkMBB;
6720}
6721
6722static MachineBasicBlock *
6724 const LoongArchSubtarget &Subtarget) {
6725 unsigned CondOpc;
6726 switch (MI.getOpcode()) {
6727 default:
6728 llvm_unreachable("Unexpected opcode");
6729 case LoongArch::PseudoVBZ:
6730 CondOpc = LoongArch::VSETEQZ_V;
6731 break;
6732 case LoongArch::PseudoVBZ_B:
6733 CondOpc = LoongArch::VSETANYEQZ_B;
6734 break;
6735 case LoongArch::PseudoVBZ_H:
6736 CondOpc = LoongArch::VSETANYEQZ_H;
6737 break;
6738 case LoongArch::PseudoVBZ_W:
6739 CondOpc = LoongArch::VSETANYEQZ_W;
6740 break;
6741 case LoongArch::PseudoVBZ_D:
6742 CondOpc = LoongArch::VSETANYEQZ_D;
6743 break;
6744 case LoongArch::PseudoVBNZ:
6745 CondOpc = LoongArch::VSETNEZ_V;
6746 break;
6747 case LoongArch::PseudoVBNZ_B:
6748 CondOpc = LoongArch::VSETALLNEZ_B;
6749 break;
6750 case LoongArch::PseudoVBNZ_H:
6751 CondOpc = LoongArch::VSETALLNEZ_H;
6752 break;
6753 case LoongArch::PseudoVBNZ_W:
6754 CondOpc = LoongArch::VSETALLNEZ_W;
6755 break;
6756 case LoongArch::PseudoVBNZ_D:
6757 CondOpc = LoongArch::VSETALLNEZ_D;
6758 break;
6759 case LoongArch::PseudoXVBZ:
6760 CondOpc = LoongArch::XVSETEQZ_V;
6761 break;
6762 case LoongArch::PseudoXVBZ_B:
6763 CondOpc = LoongArch::XVSETANYEQZ_B;
6764 break;
6765 case LoongArch::PseudoXVBZ_H:
6766 CondOpc = LoongArch::XVSETANYEQZ_H;
6767 break;
6768 case LoongArch::PseudoXVBZ_W:
6769 CondOpc = LoongArch::XVSETANYEQZ_W;
6770 break;
6771 case LoongArch::PseudoXVBZ_D:
6772 CondOpc = LoongArch::XVSETANYEQZ_D;
6773 break;
6774 case LoongArch::PseudoXVBNZ:
6775 CondOpc = LoongArch::XVSETNEZ_V;
6776 break;
6777 case LoongArch::PseudoXVBNZ_B:
6778 CondOpc = LoongArch::XVSETALLNEZ_B;
6779 break;
6780 case LoongArch::PseudoXVBNZ_H:
6781 CondOpc = LoongArch::XVSETALLNEZ_H;
6782 break;
6783 case LoongArch::PseudoXVBNZ_W:
6784 CondOpc = LoongArch::XVSETALLNEZ_W;
6785 break;
6786 case LoongArch::PseudoXVBNZ_D:
6787 CondOpc = LoongArch::XVSETALLNEZ_D;
6788 break;
6789 }
6790
6791 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6792 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6793 DebugLoc DL = MI.getDebugLoc();
6796
6797 MachineFunction *F = BB->getParent();
6798 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6799 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6800 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6801
6802 F->insert(It, FalseBB);
6803 F->insert(It, TrueBB);
6804 F->insert(It, SinkBB);
6805
6806 // Transfer the remainder of MBB and its successor edges to Sink.
6807 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6809
6810 // Insert the real instruction to BB.
6811 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6812 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6813
6814 // Insert branch.
6815 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6816 BB->addSuccessor(FalseBB);
6817 BB->addSuccessor(TrueBB);
6818
6819 // FalseBB.
6820 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6821 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6822 .addReg(LoongArch::R0)
6823 .addImm(0);
6824 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6825 FalseBB->addSuccessor(SinkBB);
6826
6827 // TrueBB.
6828 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6829 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6830 .addReg(LoongArch::R0)
6831 .addImm(1);
6832 TrueBB->addSuccessor(SinkBB);
6833
6834 // SinkBB: merge the results.
6835 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6836 MI.getOperand(0).getReg())
6837 .addReg(RD1)
6838 .addMBB(FalseBB)
6839 .addReg(RD2)
6840 .addMBB(TrueBB);
6841
6842 // The pseudo instruction is gone now.
6843 MI.eraseFromParent();
6844 return SinkBB;
6845}
6846
6847static MachineBasicBlock *
6849 const LoongArchSubtarget &Subtarget) {
6850 unsigned InsOp;
6851 unsigned BroadcastOp;
6852 unsigned HalfSize;
6853 switch (MI.getOpcode()) {
6854 default:
6855 llvm_unreachable("Unexpected opcode");
6856 case LoongArch::PseudoXVINSGR2VR_B:
6857 HalfSize = 16;
6858 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6859 InsOp = LoongArch::XVEXTRINS_B;
6860 break;
6861 case LoongArch::PseudoXVINSGR2VR_H:
6862 HalfSize = 8;
6863 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6864 InsOp = LoongArch::XVEXTRINS_H;
6865 break;
6866 }
6867 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6868 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6869 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6870 DebugLoc DL = MI.getDebugLoc();
6872 // XDst = vector_insert XSrc, Elt, Idx
6873 Register XDst = MI.getOperand(0).getReg();
6874 Register XSrc = MI.getOperand(1).getReg();
6875 Register Elt = MI.getOperand(2).getReg();
6876 unsigned Idx = MI.getOperand(3).getImm();
6877
6878 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6879 Idx < HalfSize) {
6880 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6881 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6882
6883 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6884 .addReg(XSrc, 0, LoongArch::sub_128);
6885 BuildMI(*BB, MI, DL,
6886 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6887 : LoongArch::VINSGR2VR_B),
6888 ScratchSubReg2)
6889 .addReg(ScratchSubReg1)
6890 .addReg(Elt)
6891 .addImm(Idx);
6892
6893 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6894 .addImm(0)
6895 .addReg(ScratchSubReg2)
6896 .addImm(LoongArch::sub_128);
6897 } else {
6898 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6899 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6900
6901 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6902
6903 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6904 .addReg(ScratchReg1)
6905 .addReg(XSrc)
6906 .addImm(Idx >= HalfSize ? 48 : 18);
6907
6908 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6909 .addReg(XSrc)
6910 .addReg(ScratchReg2)
6911 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6912 }
6913
6914 MI.eraseFromParent();
6915 return BB;
6916}
6917
6920 const LoongArchSubtarget &Subtarget) {
6921 assert(Subtarget.hasExtLSX());
6922 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6923 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6924 DebugLoc DL = MI.getDebugLoc();
6926 Register Dst = MI.getOperand(0).getReg();
6927 Register Src = MI.getOperand(1).getReg();
6928 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6929 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6930 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6931
6932 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6933 BuildMI(*BB, MI, DL,
6934 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6935 : LoongArch::VINSGR2VR_W),
6936 ScratchReg2)
6937 .addReg(ScratchReg1)
6938 .addReg(Src)
6939 .addImm(0);
6940 BuildMI(
6941 *BB, MI, DL,
6942 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6943 ScratchReg3)
6944 .addReg(ScratchReg2);
6945 BuildMI(*BB, MI, DL,
6946 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6947 : LoongArch::VPICKVE2GR_W),
6948 Dst)
6949 .addReg(ScratchReg3)
6950 .addImm(0);
6951
6952 MI.eraseFromParent();
6953 return BB;
6954}
6955
6956static MachineBasicBlock *
6958 const LoongArchSubtarget &Subtarget) {
6959 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6960 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6961 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6963 Register Dst = MI.getOperand(0).getReg();
6964 Register Src = MI.getOperand(1).getReg();
6965 DebugLoc DL = MI.getDebugLoc();
6966 unsigned EleBits = 8;
6967 unsigned NotOpc = 0;
6968 unsigned MskOpc;
6969
6970 switch (MI.getOpcode()) {
6971 default:
6972 llvm_unreachable("Unexpected opcode");
6973 case LoongArch::PseudoVMSKLTZ_B:
6974 MskOpc = LoongArch::VMSKLTZ_B;
6975 break;
6976 case LoongArch::PseudoVMSKLTZ_H:
6977 MskOpc = LoongArch::VMSKLTZ_H;
6978 EleBits = 16;
6979 break;
6980 case LoongArch::PseudoVMSKLTZ_W:
6981 MskOpc = LoongArch::VMSKLTZ_W;
6982 EleBits = 32;
6983 break;
6984 case LoongArch::PseudoVMSKLTZ_D:
6985 MskOpc = LoongArch::VMSKLTZ_D;
6986 EleBits = 64;
6987 break;
6988 case LoongArch::PseudoVMSKGEZ_B:
6989 MskOpc = LoongArch::VMSKGEZ_B;
6990 break;
6991 case LoongArch::PseudoVMSKEQZ_B:
6992 MskOpc = LoongArch::VMSKNZ_B;
6993 NotOpc = LoongArch::VNOR_V;
6994 break;
6995 case LoongArch::PseudoVMSKNEZ_B:
6996 MskOpc = LoongArch::VMSKNZ_B;
6997 break;
6998 case LoongArch::PseudoXVMSKLTZ_B:
6999 MskOpc = LoongArch::XVMSKLTZ_B;
7000 RC = &LoongArch::LASX256RegClass;
7001 break;
7002 case LoongArch::PseudoXVMSKLTZ_H:
7003 MskOpc = LoongArch::XVMSKLTZ_H;
7004 RC = &LoongArch::LASX256RegClass;
7005 EleBits = 16;
7006 break;
7007 case LoongArch::PseudoXVMSKLTZ_W:
7008 MskOpc = LoongArch::XVMSKLTZ_W;
7009 RC = &LoongArch::LASX256RegClass;
7010 EleBits = 32;
7011 break;
7012 case LoongArch::PseudoXVMSKLTZ_D:
7013 MskOpc = LoongArch::XVMSKLTZ_D;
7014 RC = &LoongArch::LASX256RegClass;
7015 EleBits = 64;
7016 break;
7017 case LoongArch::PseudoXVMSKGEZ_B:
7018 MskOpc = LoongArch::XVMSKGEZ_B;
7019 RC = &LoongArch::LASX256RegClass;
7020 break;
7021 case LoongArch::PseudoXVMSKEQZ_B:
7022 MskOpc = LoongArch::XVMSKNZ_B;
7023 NotOpc = LoongArch::XVNOR_V;
7024 RC = &LoongArch::LASX256RegClass;
7025 break;
7026 case LoongArch::PseudoXVMSKNEZ_B:
7027 MskOpc = LoongArch::XVMSKNZ_B;
7028 RC = &LoongArch::LASX256RegClass;
7029 break;
7030 }
7031
7032 Register Msk = MRI.createVirtualRegister(RC);
7033 if (NotOpc) {
7034 Register Tmp = MRI.createVirtualRegister(RC);
7035 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7036 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7037 .addReg(Tmp, RegState::Kill)
7038 .addReg(Tmp, RegState::Kill);
7039 } else {
7040 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7041 }
7042
7043 if (TRI->getRegSizeInBits(*RC) > 128) {
7044 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7045 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7046 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7047 .addReg(Msk)
7048 .addImm(0);
7049 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7050 .addReg(Msk, RegState::Kill)
7051 .addImm(4);
7052 BuildMI(*BB, MI, DL,
7053 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7054 : LoongArch::BSTRINS_W),
7055 Dst)
7058 .addImm(256 / EleBits - 1)
7059 .addImm(128 / EleBits);
7060 } else {
7061 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7062 .addReg(Msk, RegState::Kill)
7063 .addImm(0);
7064 }
7065
7066 MI.eraseFromParent();
7067 return BB;
7068}
7069
7070static MachineBasicBlock *
7072 const LoongArchSubtarget &Subtarget) {
7073 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7074 "Unexpected instruction");
7075
7076 MachineFunction &MF = *BB->getParent();
7077 DebugLoc DL = MI.getDebugLoc();
7079 Register LoReg = MI.getOperand(0).getReg();
7080 Register HiReg = MI.getOperand(1).getReg();
7081 Register SrcReg = MI.getOperand(2).getReg();
7082
7083 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7084 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7085 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7086 MI.eraseFromParent(); // The pseudo instruction is gone now.
7087 return BB;
7088}
7089
7090static MachineBasicBlock *
7092 const LoongArchSubtarget &Subtarget) {
7093 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7094 "Unexpected instruction");
7095
7096 MachineFunction &MF = *BB->getParent();
7097 DebugLoc DL = MI.getDebugLoc();
7100 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7101 Register DstReg = MI.getOperand(0).getReg();
7102 Register LoReg = MI.getOperand(1).getReg();
7103 Register HiReg = MI.getOperand(2).getReg();
7104
7105 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7106 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7107 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7108 .addReg(TmpReg, RegState::Kill)
7109 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7110 MI.eraseFromParent(); // The pseudo instruction is gone now.
7111 return BB;
7112}
7113
7115 switch (MI.getOpcode()) {
7116 default:
7117 return false;
7118 case LoongArch::Select_GPR_Using_CC_GPR:
7119 return true;
7120 }
7121}
7122
7123static MachineBasicBlock *
7125 const LoongArchSubtarget &Subtarget) {
7126 // To "insert" Select_* instructions, we actually have to insert the triangle
7127 // control-flow pattern. The incoming instructions know the destination vreg
7128 // to set, the condition code register to branch on, the true/false values to
7129 // select between, and the condcode to use to select the appropriate branch.
7130 //
7131 // We produce the following control flow:
7132 // HeadMBB
7133 // | \
7134 // | IfFalseMBB
7135 // | /
7136 // TailMBB
7137 //
7138 // When we find a sequence of selects we attempt to optimize their emission
7139 // by sharing the control flow. Currently we only handle cases where we have
7140 // multiple selects with the exact same condition (same LHS, RHS and CC).
7141 // The selects may be interleaved with other instructions if the other
7142 // instructions meet some requirements we deem safe:
7143 // - They are not pseudo instructions.
7144 // - They are debug instructions. Otherwise,
7145 // - They do not have side-effects, do not access memory and their inputs do
7146 // not depend on the results of the select pseudo-instructions.
7147 // The TrueV/FalseV operands of the selects cannot depend on the result of
7148 // previous selects in the sequence.
7149 // These conditions could be further relaxed. See the X86 target for a
7150 // related approach and more information.
7151
7152 Register LHS = MI.getOperand(1).getReg();
7153 Register RHS;
7154 if (MI.getOperand(2).isReg())
7155 RHS = MI.getOperand(2).getReg();
7156 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7157
7158 SmallVector<MachineInstr *, 4> SelectDebugValues;
7159 SmallSet<Register, 4> SelectDests;
7160 SelectDests.insert(MI.getOperand(0).getReg());
7161
7162 MachineInstr *LastSelectPseudo = &MI;
7163 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7164 SequenceMBBI != E; ++SequenceMBBI) {
7165 if (SequenceMBBI->isDebugInstr())
7166 continue;
7167 if (isSelectPseudo(*SequenceMBBI)) {
7168 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7169 !SequenceMBBI->getOperand(2).isReg() ||
7170 SequenceMBBI->getOperand(2).getReg() != RHS ||
7171 SequenceMBBI->getOperand(3).getImm() != CC ||
7172 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7173 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7174 break;
7175 LastSelectPseudo = &*SequenceMBBI;
7176 SequenceMBBI->collectDebugValues(SelectDebugValues);
7177 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7178 continue;
7179 }
7180 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7181 SequenceMBBI->mayLoadOrStore() ||
7182 SequenceMBBI->usesCustomInsertionHook())
7183 break;
7184 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7185 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7186 }))
7187 break;
7188 }
7189
7190 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7191 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7192 DebugLoc DL = MI.getDebugLoc();
7194
7195 MachineBasicBlock *HeadMBB = BB;
7196 MachineFunction *F = BB->getParent();
7197 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7198 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7199
7200 F->insert(I, IfFalseMBB);
7201 F->insert(I, TailMBB);
7202
7203 // Set the call frame size on entry to the new basic blocks.
7204 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7205 IfFalseMBB->setCallFrameSize(CallFrameSize);
7206 TailMBB->setCallFrameSize(CallFrameSize);
7207
7208 // Transfer debug instructions associated with the selects to TailMBB.
7209 for (MachineInstr *DebugInstr : SelectDebugValues) {
7210 TailMBB->push_back(DebugInstr->removeFromParent());
7211 }
7212
7213 // Move all instructions after the sequence to TailMBB.
7214 TailMBB->splice(TailMBB->end(), HeadMBB,
7215 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7216 // Update machine-CFG edges by transferring all successors of the current
7217 // block to the new block which will contain the Phi nodes for the selects.
7218 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7219 // Set the successors for HeadMBB.
7220 HeadMBB->addSuccessor(IfFalseMBB);
7221 HeadMBB->addSuccessor(TailMBB);
7222
7223 // Insert appropriate branch.
7224 if (MI.getOperand(2).isImm())
7225 BuildMI(HeadMBB, DL, TII.get(CC))
7226 .addReg(LHS)
7227 .addImm(MI.getOperand(2).getImm())
7228 .addMBB(TailMBB);
7229 else
7230 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7231
7232 // IfFalseMBB just falls through to TailMBB.
7233 IfFalseMBB->addSuccessor(TailMBB);
7234
7235 // Create PHIs for all of the select pseudo-instructions.
7236 auto SelectMBBI = MI.getIterator();
7237 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7238 auto InsertionPoint = TailMBB->begin();
7239 while (SelectMBBI != SelectEnd) {
7240 auto Next = std::next(SelectMBBI);
7241 if (isSelectPseudo(*SelectMBBI)) {
7242 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7243 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7244 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7245 .addReg(SelectMBBI->getOperand(4).getReg())
7246 .addMBB(HeadMBB)
7247 .addReg(SelectMBBI->getOperand(5).getReg())
7248 .addMBB(IfFalseMBB);
7249 SelectMBBI->eraseFromParent();
7250 }
7251 SelectMBBI = Next;
7252 }
7253
7254 F->getProperties().resetNoPHIs();
7255 return TailMBB;
7256}
7257
7258MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7259 MachineInstr &MI, MachineBasicBlock *BB) const {
7260 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7261 DebugLoc DL = MI.getDebugLoc();
7262
7263 switch (MI.getOpcode()) {
7264 default:
7265 llvm_unreachable("Unexpected instr type to insert");
7266 case LoongArch::DIV_W:
7267 case LoongArch::DIV_WU:
7268 case LoongArch::MOD_W:
7269 case LoongArch::MOD_WU:
7270 case LoongArch::DIV_D:
7271 case LoongArch::DIV_DU:
7272 case LoongArch::MOD_D:
7273 case LoongArch::MOD_DU:
7274 return insertDivByZeroTrap(MI, BB);
7275 break;
7276 case LoongArch::WRFCSR: {
7277 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7278 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7279 .addReg(MI.getOperand(1).getReg());
7280 MI.eraseFromParent();
7281 return BB;
7282 }
7283 case LoongArch::RDFCSR: {
7284 MachineInstr *ReadFCSR =
7285 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7286 MI.getOperand(0).getReg())
7287 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7288 ReadFCSR->getOperand(1).setIsUndef();
7289 MI.eraseFromParent();
7290 return BB;
7291 }
7292 case LoongArch::Select_GPR_Using_CC_GPR:
7293 return emitSelectPseudo(MI, BB, Subtarget);
7294 case LoongArch::BuildPairF64Pseudo:
7295 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7296 case LoongArch::SplitPairF64Pseudo:
7297 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7298 case LoongArch::PseudoVBZ:
7299 case LoongArch::PseudoVBZ_B:
7300 case LoongArch::PseudoVBZ_H:
7301 case LoongArch::PseudoVBZ_W:
7302 case LoongArch::PseudoVBZ_D:
7303 case LoongArch::PseudoVBNZ:
7304 case LoongArch::PseudoVBNZ_B:
7305 case LoongArch::PseudoVBNZ_H:
7306 case LoongArch::PseudoVBNZ_W:
7307 case LoongArch::PseudoVBNZ_D:
7308 case LoongArch::PseudoXVBZ:
7309 case LoongArch::PseudoXVBZ_B:
7310 case LoongArch::PseudoXVBZ_H:
7311 case LoongArch::PseudoXVBZ_W:
7312 case LoongArch::PseudoXVBZ_D:
7313 case LoongArch::PseudoXVBNZ:
7314 case LoongArch::PseudoXVBNZ_B:
7315 case LoongArch::PseudoXVBNZ_H:
7316 case LoongArch::PseudoXVBNZ_W:
7317 case LoongArch::PseudoXVBNZ_D:
7318 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7319 case LoongArch::PseudoXVINSGR2VR_B:
7320 case LoongArch::PseudoXVINSGR2VR_H:
7321 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7322 case LoongArch::PseudoCTPOP:
7323 return emitPseudoCTPOP(MI, BB, Subtarget);
7324 case LoongArch::PseudoVMSKLTZ_B:
7325 case LoongArch::PseudoVMSKLTZ_H:
7326 case LoongArch::PseudoVMSKLTZ_W:
7327 case LoongArch::PseudoVMSKLTZ_D:
7328 case LoongArch::PseudoVMSKGEZ_B:
7329 case LoongArch::PseudoVMSKEQZ_B:
7330 case LoongArch::PseudoVMSKNEZ_B:
7331 case LoongArch::PseudoXVMSKLTZ_B:
7332 case LoongArch::PseudoXVMSKLTZ_H:
7333 case LoongArch::PseudoXVMSKLTZ_W:
7334 case LoongArch::PseudoXVMSKLTZ_D:
7335 case LoongArch::PseudoXVMSKGEZ_B:
7336 case LoongArch::PseudoXVMSKEQZ_B:
7337 case LoongArch::PseudoXVMSKNEZ_B:
7338 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7339 case TargetOpcode::STATEPOINT:
7340 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7341 // while bl call instruction (where statepoint will be lowered at the
7342 // end) has implicit def. This def is early-clobber as it will be set at
7343 // the moment of the call and earlier than any use is read.
7344 // Add this implicit dead def here as a workaround.
7345 MI.addOperand(*MI.getMF(),
7347 LoongArch::R1, /*isDef*/ true,
7348 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7349 /*isUndef*/ false, /*isEarlyClobber*/ true));
7350 if (!Subtarget.is64Bit())
7351 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7352 return emitPatchPoint(MI, BB);
7353 }
7354}
7355
7357 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7358 unsigned *Fast) const {
7359 if (!Subtarget.hasUAL())
7360 return false;
7361
7362 // TODO: set reasonable speed number.
7363 if (Fast)
7364 *Fast = 1;
7365 return true;
7366}
7367
7368const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7369 switch ((LoongArchISD::NodeType)Opcode) {
7371 break;
7372
7373#define NODE_NAME_CASE(node) \
7374 case LoongArchISD::node: \
7375 return "LoongArchISD::" #node;
7376
7377 // TODO: Add more target-dependent nodes later.
7378 NODE_NAME_CASE(CALL)
7379 NODE_NAME_CASE(CALL_MEDIUM)
7380 NODE_NAME_CASE(CALL_LARGE)
7381 NODE_NAME_CASE(RET)
7382 NODE_NAME_CASE(TAIL)
7383 NODE_NAME_CASE(TAIL_MEDIUM)
7384 NODE_NAME_CASE(TAIL_LARGE)
7385 NODE_NAME_CASE(SELECT_CC)
7386 NODE_NAME_CASE(BR_CC)
7387 NODE_NAME_CASE(BRCOND)
7388 NODE_NAME_CASE(SLL_W)
7389 NODE_NAME_CASE(SRA_W)
7390 NODE_NAME_CASE(SRL_W)
7391 NODE_NAME_CASE(BSTRINS)
7392 NODE_NAME_CASE(BSTRPICK)
7393 NODE_NAME_CASE(MOVGR2FR_W)
7394 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7395 NODE_NAME_CASE(MOVGR2FR_D)
7396 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7397 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7398 NODE_NAME_CASE(FTINT)
7399 NODE_NAME_CASE(BUILD_PAIR_F64)
7400 NODE_NAME_CASE(SPLIT_PAIR_F64)
7401 NODE_NAME_CASE(REVB_2H)
7402 NODE_NAME_CASE(REVB_2W)
7403 NODE_NAME_CASE(BITREV_4B)
7404 NODE_NAME_CASE(BITREV_8B)
7405 NODE_NAME_CASE(BITREV_W)
7406 NODE_NAME_CASE(ROTR_W)
7407 NODE_NAME_CASE(ROTL_W)
7408 NODE_NAME_CASE(DIV_W)
7409 NODE_NAME_CASE(DIV_WU)
7410 NODE_NAME_CASE(MOD_W)
7411 NODE_NAME_CASE(MOD_WU)
7412 NODE_NAME_CASE(CLZ_W)
7413 NODE_NAME_CASE(CTZ_W)
7414 NODE_NAME_CASE(DBAR)
7415 NODE_NAME_CASE(IBAR)
7416 NODE_NAME_CASE(BREAK)
7417 NODE_NAME_CASE(SYSCALL)
7418 NODE_NAME_CASE(CRC_W_B_W)
7419 NODE_NAME_CASE(CRC_W_H_W)
7420 NODE_NAME_CASE(CRC_W_W_W)
7421 NODE_NAME_CASE(CRC_W_D_W)
7422 NODE_NAME_CASE(CRCC_W_B_W)
7423 NODE_NAME_CASE(CRCC_W_H_W)
7424 NODE_NAME_CASE(CRCC_W_W_W)
7425 NODE_NAME_CASE(CRCC_W_D_W)
7426 NODE_NAME_CASE(CSRRD)
7427 NODE_NAME_CASE(CSRWR)
7428 NODE_NAME_CASE(CSRXCHG)
7429 NODE_NAME_CASE(IOCSRRD_B)
7430 NODE_NAME_CASE(IOCSRRD_H)
7431 NODE_NAME_CASE(IOCSRRD_W)
7432 NODE_NAME_CASE(IOCSRRD_D)
7433 NODE_NAME_CASE(IOCSRWR_B)
7434 NODE_NAME_CASE(IOCSRWR_H)
7435 NODE_NAME_CASE(IOCSRWR_W)
7436 NODE_NAME_CASE(IOCSRWR_D)
7437 NODE_NAME_CASE(CPUCFG)
7438 NODE_NAME_CASE(MOVGR2FCSR)
7439 NODE_NAME_CASE(MOVFCSR2GR)
7440 NODE_NAME_CASE(CACOP_D)
7441 NODE_NAME_CASE(CACOP_W)
7442 NODE_NAME_CASE(VSHUF)
7443 NODE_NAME_CASE(VPICKEV)
7444 NODE_NAME_CASE(VPICKOD)
7445 NODE_NAME_CASE(VPACKEV)
7446 NODE_NAME_CASE(VPACKOD)
7447 NODE_NAME_CASE(VILVL)
7448 NODE_NAME_CASE(VILVH)
7449 NODE_NAME_CASE(VSHUF4I)
7450 NODE_NAME_CASE(VREPLVEI)
7451 NODE_NAME_CASE(VREPLGR2VR)
7452 NODE_NAME_CASE(XVPERMI)
7453 NODE_NAME_CASE(XVPERM)
7454 NODE_NAME_CASE(XVREPLVE0)
7455 NODE_NAME_CASE(XVREPLVE0Q)
7456 NODE_NAME_CASE(VPICK_SEXT_ELT)
7457 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7458 NODE_NAME_CASE(VREPLVE)
7459 NODE_NAME_CASE(VALL_ZERO)
7460 NODE_NAME_CASE(VANY_ZERO)
7461 NODE_NAME_CASE(VALL_NONZERO)
7462 NODE_NAME_CASE(VANY_NONZERO)
7463 NODE_NAME_CASE(FRECIPE)
7464 NODE_NAME_CASE(FRSQRTE)
7465 NODE_NAME_CASE(VSLLI)
7466 NODE_NAME_CASE(VSRLI)
7467 NODE_NAME_CASE(VBSLL)
7468 NODE_NAME_CASE(VBSRL)
7469 NODE_NAME_CASE(VLDREPL)
7470 NODE_NAME_CASE(VMSKLTZ)
7471 NODE_NAME_CASE(VMSKGEZ)
7472 NODE_NAME_CASE(VMSKEQZ)
7473 NODE_NAME_CASE(VMSKNEZ)
7474 NODE_NAME_CASE(XVMSKLTZ)
7475 NODE_NAME_CASE(XVMSKGEZ)
7476 NODE_NAME_CASE(XVMSKEQZ)
7477 NODE_NAME_CASE(XVMSKNEZ)
7478 NODE_NAME_CASE(VHADDW)
7479 }
7480#undef NODE_NAME_CASE
7481 return nullptr;
7482}
7483
7484//===----------------------------------------------------------------------===//
7485// Calling Convention Implementation
7486//===----------------------------------------------------------------------===//
7487
7488// Eight general-purpose registers a0-a7 used for passing integer arguments,
7489// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7490// fixed-point arguments, and floating-point arguments when no FPR is available
7491// or with soft float ABI.
7492const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7493 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7494 LoongArch::R10, LoongArch::R11};
7495// Eight floating-point registers fa0-fa7 used for passing floating-point
7496// arguments, and fa0-fa1 are also used to return values.
7497const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7498 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7499 LoongArch::F6, LoongArch::F7};
7500// FPR32 and FPR64 alias each other.
7502 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7503 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7504
7505const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7506 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7507 LoongArch::VR6, LoongArch::VR7};
7508
7509const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7510 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7511 LoongArch::XR6, LoongArch::XR7};
7512
7513// Pass a 2*GRLen argument that has been split into two GRLen values through
7514// registers or the stack as necessary.
7515static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7516 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7517 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7518 ISD::ArgFlagsTy ArgFlags2) {
7519 unsigned GRLenInBytes = GRLen / 8;
7520 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7521 // At least one half can be passed via register.
7522 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7523 VA1.getLocVT(), CCValAssign::Full));
7524 } else {
7525 // Both halves must be passed on the stack, with proper alignment.
7526 Align StackAlign =
7527 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7528 State.addLoc(
7530 State.AllocateStack(GRLenInBytes, StackAlign),
7531 VA1.getLocVT(), CCValAssign::Full));
7532 State.addLoc(CCValAssign::getMem(
7533 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7534 LocVT2, CCValAssign::Full));
7535 return false;
7536 }
7537 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7538 // The second half can also be passed via register.
7539 State.addLoc(
7540 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7541 } else {
7542 // The second half is passed via the stack, without additional alignment.
7543 State.addLoc(CCValAssign::getMem(
7544 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7545 LocVT2, CCValAssign::Full));
7546 }
7547 return false;
7548}
7549
7550// Implements the LoongArch calling convention. Returns true upon failure.
7552 unsigned ValNo, MVT ValVT,
7553 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7554 CCState &State, bool IsRet, Type *OrigTy) {
7555 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7556 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7557 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7558 MVT LocVT = ValVT;
7559
7560 // Any return value split into more than two values can't be returned
7561 // directly.
7562 if (IsRet && ValNo > 1)
7563 return true;
7564
7565 // If passing a variadic argument, or if no FPR is available.
7566 bool UseGPRForFloat = true;
7567
7568 switch (ABI) {
7569 default:
7570 llvm_unreachable("Unexpected ABI");
7571 break;
7576 UseGPRForFloat = ArgFlags.isVarArg();
7577 break;
7580 break;
7581 }
7582
7583 // If this is a variadic argument, the LoongArch calling convention requires
7584 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7585 // byte alignment. An aligned register should be used regardless of whether
7586 // the original argument was split during legalisation or not. The argument
7587 // will not be passed by registers if the original type is larger than
7588 // 2*GRLen, so the register alignment rule does not apply.
7589 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7590 if (ArgFlags.isVarArg() &&
7591 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7592 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7593 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7594 // Skip 'odd' register if necessary.
7595 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7596 State.AllocateReg(ArgGPRs);
7597 }
7598
7599 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7600 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7601 State.getPendingArgFlags();
7602
7603 assert(PendingLocs.size() == PendingArgFlags.size() &&
7604 "PendingLocs and PendingArgFlags out of sync");
7605
7606 // FPR32 and FPR64 alias each other.
7607 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7608 UseGPRForFloat = true;
7609
7610 if (UseGPRForFloat && ValVT == MVT::f32) {
7611 LocVT = GRLenVT;
7612 LocInfo = CCValAssign::BCvt;
7613 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7614 LocVT = MVT::i64;
7615 LocInfo = CCValAssign::BCvt;
7616 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7617 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7618 // registers are exhausted.
7619 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7620 // Depending on available argument GPRS, f64 may be passed in a pair of
7621 // GPRs, split between a GPR and the stack, or passed completely on the
7622 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7623 // cases.
7624 MCRegister Reg = State.AllocateReg(ArgGPRs);
7625 if (!Reg) {
7626 int64_t StackOffset = State.AllocateStack(8, Align(8));
7627 State.addLoc(
7628 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7629 return false;
7630 }
7631 LocVT = MVT::i32;
7632 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7633 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7634 if (HiReg) {
7635 State.addLoc(
7636 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7637 } else {
7638 int64_t StackOffset = State.AllocateStack(4, Align(4));
7639 State.addLoc(
7640 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7641 }
7642 return false;
7643 }
7644
7645 // Split arguments might be passed indirectly, so keep track of the pending
7646 // values.
7647 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7648 LocVT = GRLenVT;
7649 LocInfo = CCValAssign::Indirect;
7650 PendingLocs.push_back(
7651 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7652 PendingArgFlags.push_back(ArgFlags);
7653 if (!ArgFlags.isSplitEnd()) {
7654 return false;
7655 }
7656 }
7657
7658 // If the split argument only had two elements, it should be passed directly
7659 // in registers or on the stack.
7660 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7661 PendingLocs.size() <= 2) {
7662 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7663 // Apply the normal calling convention rules to the first half of the
7664 // split argument.
7665 CCValAssign VA = PendingLocs[0];
7666 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7667 PendingLocs.clear();
7668 PendingArgFlags.clear();
7669 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7670 ArgFlags);
7671 }
7672
7673 // Allocate to a register if possible, or else a stack slot.
7674 Register Reg;
7675 unsigned StoreSizeBytes = GRLen / 8;
7676 Align StackAlign = Align(GRLen / 8);
7677
7678 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7679 Reg = State.AllocateReg(ArgFPR32s);
7680 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7681 Reg = State.AllocateReg(ArgFPR64s);
7682 } else if (ValVT.is128BitVector()) {
7683 Reg = State.AllocateReg(ArgVRs);
7684 UseGPRForFloat = false;
7685 StoreSizeBytes = 16;
7686 StackAlign = Align(16);
7687 } else if (ValVT.is256BitVector()) {
7688 Reg = State.AllocateReg(ArgXRs);
7689 UseGPRForFloat = false;
7690 StoreSizeBytes = 32;
7691 StackAlign = Align(32);
7692 } else {
7693 Reg = State.AllocateReg(ArgGPRs);
7694 }
7695
7696 unsigned StackOffset =
7697 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7698
7699 // If we reach this point and PendingLocs is non-empty, we must be at the
7700 // end of a split argument that must be passed indirectly.
7701 if (!PendingLocs.empty()) {
7702 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7703 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7704 for (auto &It : PendingLocs) {
7705 if (Reg)
7706 It.convertToReg(Reg);
7707 else
7708 It.convertToMem(StackOffset);
7709 State.addLoc(It);
7710 }
7711 PendingLocs.clear();
7712 PendingArgFlags.clear();
7713 return false;
7714 }
7715 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7716 "Expected an GRLenVT at this stage");
7717
7718 if (Reg) {
7719 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7720 return false;
7721 }
7722
7723 // When a floating-point value is passed on the stack, no bit-cast is needed.
7724 if (ValVT.isFloatingPoint()) {
7725 LocVT = ValVT;
7726 LocInfo = CCValAssign::Full;
7727 }
7728
7729 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7730 return false;
7731}
7732
7733void LoongArchTargetLowering::analyzeInputArgs(
7734 MachineFunction &MF, CCState &CCInfo,
7735 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7736 LoongArchCCAssignFn Fn) const {
7737 FunctionType *FType = MF.getFunction().getFunctionType();
7738 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7739 MVT ArgVT = Ins[i].VT;
7740 Type *ArgTy = nullptr;
7741 if (IsRet)
7742 ArgTy = FType->getReturnType();
7743 else if (Ins[i].isOrigArg())
7744 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7746 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7747 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7748 CCInfo, IsRet, ArgTy)) {
7749 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7750 << '\n');
7751 llvm_unreachable("");
7752 }
7753 }
7754}
7755
7756void LoongArchTargetLowering::analyzeOutputArgs(
7757 MachineFunction &MF, CCState &CCInfo,
7758 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7759 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7760 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7761 MVT ArgVT = Outs[i].VT;
7762 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7764 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7765 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7766 CCInfo, IsRet, OrigTy)) {
7767 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7768 << "\n");
7769 llvm_unreachable("");
7770 }
7771 }
7772}
7773
7774// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7775// values.
7777 const CCValAssign &VA, const SDLoc &DL) {
7778 switch (VA.getLocInfo()) {
7779 default:
7780 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7781 case CCValAssign::Full:
7783 break;
7784 case CCValAssign::BCvt:
7785 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7786 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7787 else
7788 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7789 break;
7790 }
7791 return Val;
7792}
7793
7795 const CCValAssign &VA, const SDLoc &DL,
7796 const ISD::InputArg &In,
7797 const LoongArchTargetLowering &TLI) {
7800 EVT LocVT = VA.getLocVT();
7801 SDValue Val;
7802 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7803 Register VReg = RegInfo.createVirtualRegister(RC);
7804 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7805 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7806
7807 // If input is sign extended from 32 bits, note it for the OptW pass.
7808 if (In.isOrigArg()) {
7809 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7810 if (OrigArg->getType()->isIntegerTy()) {
7811 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7812 // An input zero extended from i31 can also be considered sign extended.
7813 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7814 (BitWidth < 32 && In.Flags.isZExt())) {
7817 LAFI->addSExt32Register(VReg);
7818 }
7819 }
7820 }
7821
7822 return convertLocVTToValVT(DAG, Val, VA, DL);
7823}
7824
7825// The caller is responsible for loading the full value if the argument is
7826// passed with CCValAssign::Indirect.
7828 const CCValAssign &VA, const SDLoc &DL) {
7830 MachineFrameInfo &MFI = MF.getFrameInfo();
7831 EVT ValVT = VA.getValVT();
7832 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7833 /*IsImmutable=*/true);
7834 SDValue FIN = DAG.getFrameIndex(
7836
7837 ISD::LoadExtType ExtType;
7838 switch (VA.getLocInfo()) {
7839 default:
7840 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7841 case CCValAssign::Full:
7843 case CCValAssign::BCvt:
7844 ExtType = ISD::NON_EXTLOAD;
7845 break;
7846 }
7847 return DAG.getExtLoad(
7848 ExtType, DL, VA.getLocVT(), Chain, FIN,
7850}
7851
7853 const CCValAssign &VA,
7854 const CCValAssign &HiVA,
7855 const SDLoc &DL) {
7856 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7857 "Unexpected VA");
7859 MachineFrameInfo &MFI = MF.getFrameInfo();
7861
7862 assert(VA.isRegLoc() && "Expected register VA assignment");
7863
7864 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7865 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7866 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7867 SDValue Hi;
7868 if (HiVA.isMemLoc()) {
7869 // Second half of f64 is passed on the stack.
7870 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7871 /*IsImmutable=*/true);
7872 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7873 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7875 } else {
7876 // Second half of f64 is passed in another GPR.
7877 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7878 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7879 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7880 }
7881 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7882}
7883
7885 const CCValAssign &VA, const SDLoc &DL) {
7886 EVT LocVT = VA.getLocVT();
7887
7888 switch (VA.getLocInfo()) {
7889 default:
7890 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7891 case CCValAssign::Full:
7892 break;
7893 case CCValAssign::BCvt:
7894 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7895 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7896 else
7897 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7898 break;
7899 }
7900 return Val;
7901}
7902
7903static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7904 CCValAssign::LocInfo LocInfo,
7905 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7906 CCState &State) {
7907 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7908 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7909 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7910 static const MCPhysReg GPRList[] = {
7911 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7912 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7913 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7914 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7915 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7916 return false;
7917 }
7918 }
7919
7920 if (LocVT == MVT::f32) {
7921 // Pass in STG registers: F1, F2, F3, F4
7922 // fs0,fs1,fs2,fs3
7923 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7924 LoongArch::F26, LoongArch::F27};
7925 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7926 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7927 return false;
7928 }
7929 }
7930
7931 if (LocVT == MVT::f64) {
7932 // Pass in STG registers: D1, D2, D3, D4
7933 // fs4,fs5,fs6,fs7
7934 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7935 LoongArch::F30_64, LoongArch::F31_64};
7936 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7937 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7938 return false;
7939 }
7940 }
7941
7942 report_fatal_error("No registers left in GHC calling convention");
7943 return true;
7944}
7945
7946// Transform physical registers into virtual registers.
7948 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7949 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7950 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7951
7953
7954 switch (CallConv) {
7955 default:
7956 llvm_unreachable("Unsupported calling convention");
7957 case CallingConv::C:
7958 case CallingConv::Fast:
7960 break;
7961 case CallingConv::GHC:
7962 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7963 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7965 "GHC calling convention requires the F and D extensions");
7966 }
7967
7968 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7969 MVT GRLenVT = Subtarget.getGRLenVT();
7970 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7971 // Used with varargs to acumulate store chains.
7972 std::vector<SDValue> OutChains;
7973
7974 // Assign locations to all of the incoming arguments.
7976 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7977
7978 if (CallConv == CallingConv::GHC)
7980 else
7981 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7982
7983 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7984 CCValAssign &VA = ArgLocs[i];
7985 SDValue ArgValue;
7986 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7987 // case.
7988 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7989 assert(VA.needsCustom());
7990 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7991 } else if (VA.isRegLoc())
7992 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7993 else
7994 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7995 if (VA.getLocInfo() == CCValAssign::Indirect) {
7996 // If the original argument was split and passed by reference, we need to
7997 // load all parts of it here (using the same address).
7998 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8000 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8001 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8002 assert(ArgPartOffset == 0);
8003 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8004 CCValAssign &PartVA = ArgLocs[i + 1];
8005 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8006 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8007 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8008 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8010 ++i;
8011 ++InsIdx;
8012 }
8013 continue;
8014 }
8015 InVals.push_back(ArgValue);
8016 }
8017
8018 if (IsVarArg) {
8020 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8021 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8022 MachineFrameInfo &MFI = MF.getFrameInfo();
8023 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8024 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8025
8026 // Offset of the first variable argument from stack pointer, and size of
8027 // the vararg save area. For now, the varargs save area is either zero or
8028 // large enough to hold a0-a7.
8029 int VaArgOffset, VarArgsSaveSize;
8030
8031 // If all registers are allocated, then all varargs must be passed on the
8032 // stack and we don't need to save any argregs.
8033 if (ArgRegs.size() == Idx) {
8034 VaArgOffset = CCInfo.getStackSize();
8035 VarArgsSaveSize = 0;
8036 } else {
8037 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8038 VaArgOffset = -VarArgsSaveSize;
8039 }
8040
8041 // Record the frame index of the first variable argument
8042 // which is a value necessary to VASTART.
8043 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8044 LoongArchFI->setVarArgsFrameIndex(FI);
8045
8046 // If saving an odd number of registers then create an extra stack slot to
8047 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8048 // offsets to even-numbered registered remain 2*GRLen-aligned.
8049 if (Idx % 2) {
8050 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8051 true);
8052 VarArgsSaveSize += GRLenInBytes;
8053 }
8054
8055 // Copy the integer registers that may have been used for passing varargs
8056 // to the vararg save area.
8057 for (unsigned I = Idx; I < ArgRegs.size();
8058 ++I, VaArgOffset += GRLenInBytes) {
8059 const Register Reg = RegInfo.createVirtualRegister(RC);
8060 RegInfo.addLiveIn(ArgRegs[I], Reg);
8061 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8062 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8063 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8064 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8066 cast<StoreSDNode>(Store.getNode())
8067 ->getMemOperand()
8068 ->setValue((Value *)nullptr);
8069 OutChains.push_back(Store);
8070 }
8071 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8072 }
8073
8074 // All stores are grouped in one node to allow the matching between
8075 // the size of Ins and InVals. This only happens for vararg functions.
8076 if (!OutChains.empty()) {
8077 OutChains.push_back(Chain);
8078 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8079 }
8080
8081 return Chain;
8082}
8083
8085 return CI->isTailCall();
8086}
8087
8088// Check if the return value is used as only a return value, as otherwise
8089// we can't perform a tail-call.
8091 SDValue &Chain) const {
8092 if (N->getNumValues() != 1)
8093 return false;
8094 if (!N->hasNUsesOfValue(1, 0))
8095 return false;
8096
8097 SDNode *Copy = *N->user_begin();
8098 if (Copy->getOpcode() != ISD::CopyToReg)
8099 return false;
8100
8101 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8102 // isn't safe to perform a tail call.
8103 if (Copy->getGluedNode())
8104 return false;
8105
8106 // The copy must be used by a LoongArchISD::RET, and nothing else.
8107 bool HasRet = false;
8108 for (SDNode *Node : Copy->users()) {
8109 if (Node->getOpcode() != LoongArchISD::RET)
8110 return false;
8111 HasRet = true;
8112 }
8113
8114 if (!HasRet)
8115 return false;
8116
8117 Chain = Copy->getOperand(0);
8118 return true;
8119}
8120
8121// Check whether the call is eligible for tail call optimization.
8122bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8123 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8124 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8125
8126 auto CalleeCC = CLI.CallConv;
8127 auto &Outs = CLI.Outs;
8128 auto &Caller = MF.getFunction();
8129 auto CallerCC = Caller.getCallingConv();
8130
8131 // Do not tail call opt if the stack is used to pass parameters.
8132 if (CCInfo.getStackSize() != 0)
8133 return false;
8134
8135 // Do not tail call opt if any parameters need to be passed indirectly.
8136 for (auto &VA : ArgLocs)
8137 if (VA.getLocInfo() == CCValAssign::Indirect)
8138 return false;
8139
8140 // Do not tail call opt if either caller or callee uses struct return
8141 // semantics.
8142 auto IsCallerStructRet = Caller.hasStructRetAttr();
8143 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8144 if (IsCallerStructRet || IsCalleeStructRet)
8145 return false;
8146
8147 // Do not tail call opt if either the callee or caller has a byval argument.
8148 for (auto &Arg : Outs)
8149 if (Arg.Flags.isByVal())
8150 return false;
8151
8152 // The callee has to preserve all registers the caller needs to preserve.
8153 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8154 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8155 if (CalleeCC != CallerCC) {
8156 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8157 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8158 return false;
8159 }
8160 return true;
8161}
8162
8164 return DAG.getDataLayout().getPrefTypeAlign(
8165 VT.getTypeForEVT(*DAG.getContext()));
8166}
8167
8168// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8169// and output parameter nodes.
8170SDValue
8172 SmallVectorImpl<SDValue> &InVals) const {
8173 SelectionDAG &DAG = CLI.DAG;
8174 SDLoc &DL = CLI.DL;
8176 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8178 SDValue Chain = CLI.Chain;
8179 SDValue Callee = CLI.Callee;
8180 CallingConv::ID CallConv = CLI.CallConv;
8181 bool IsVarArg = CLI.IsVarArg;
8182 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8183 MVT GRLenVT = Subtarget.getGRLenVT();
8184 bool &IsTailCall = CLI.IsTailCall;
8185
8187
8188 // Analyze the operands of the call, assigning locations to each operand.
8190 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8191
8192 if (CallConv == CallingConv::GHC)
8193 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8194 else
8195 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8196
8197 // Check if it's really possible to do a tail call.
8198 if (IsTailCall)
8199 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8200
8201 if (IsTailCall)
8202 ++NumTailCalls;
8203 else if (CLI.CB && CLI.CB->isMustTailCall())
8204 report_fatal_error("failed to perform tail call elimination on a call "
8205 "site marked musttail");
8206
8207 // Get a count of how many bytes are to be pushed on the stack.
8208 unsigned NumBytes = ArgCCInfo.getStackSize();
8209
8210 // Create local copies for byval args.
8211 SmallVector<SDValue> ByValArgs;
8212 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8213 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8214 if (!Flags.isByVal())
8215 continue;
8216
8217 SDValue Arg = OutVals[i];
8218 unsigned Size = Flags.getByValSize();
8219 Align Alignment = Flags.getNonZeroByValAlign();
8220
8221 int FI =
8222 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8223 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8224 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8225
8226 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8227 /*IsVolatile=*/false,
8228 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8230 ByValArgs.push_back(FIPtr);
8231 }
8232
8233 if (!IsTailCall)
8234 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8235
8236 // Copy argument values to their designated locations.
8238 SmallVector<SDValue> MemOpChains;
8239 SDValue StackPtr;
8240 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8241 ++i, ++OutIdx) {
8242 CCValAssign &VA = ArgLocs[i];
8243 SDValue ArgValue = OutVals[OutIdx];
8244 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8245
8246 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8247 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8248 assert(VA.isRegLoc() && "Expected register VA assignment");
8249 assert(VA.needsCustom());
8250 SDValue SplitF64 =
8252 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8253 SDValue Lo = SplitF64.getValue(0);
8254 SDValue Hi = SplitF64.getValue(1);
8255
8256 Register RegLo = VA.getLocReg();
8257 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8258
8259 // Get the CCValAssign for the Hi part.
8260 CCValAssign &HiVA = ArgLocs[++i];
8261
8262 if (HiVA.isMemLoc()) {
8263 // Second half of f64 is passed on the stack.
8264 if (!StackPtr.getNode())
8265 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8267 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8268 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8269 // Emit the store.
8270 MemOpChains.push_back(DAG.getStore(
8271 Chain, DL, Hi, Address,
8273 } else {
8274 // Second half of f64 is passed in another GPR.
8275 Register RegHigh = HiVA.getLocReg();
8276 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8277 }
8278 continue;
8279 }
8280
8281 // Promote the value if needed.
8282 // For now, only handle fully promoted and indirect arguments.
8283 if (VA.getLocInfo() == CCValAssign::Indirect) {
8284 // Store the argument in a stack slot and pass its address.
8285 Align StackAlign =
8286 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8287 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8288 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8289 // If the original argument was split and passed by reference, we need to
8290 // store the required parts of it here (and pass just one address).
8291 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8292 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8293 assert(ArgPartOffset == 0);
8294 // Calculate the total size to store. We don't have access to what we're
8295 // actually storing other than performing the loop and collecting the
8296 // info.
8298 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8299 SDValue PartValue = OutVals[OutIdx + 1];
8300 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8301 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8302 EVT PartVT = PartValue.getValueType();
8303
8304 StoredSize += PartVT.getStoreSize();
8305 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8306 Parts.push_back(std::make_pair(PartValue, Offset));
8307 ++i;
8308 ++OutIdx;
8309 }
8310 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8311 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8312 MemOpChains.push_back(
8313 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8315 for (const auto &Part : Parts) {
8316 SDValue PartValue = Part.first;
8317 SDValue PartOffset = Part.second;
8319 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8320 MemOpChains.push_back(
8321 DAG.getStore(Chain, DL, PartValue, Address,
8323 }
8324 ArgValue = SpillSlot;
8325 } else {
8326 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8327 }
8328
8329 // Use local copy if it is a byval arg.
8330 if (Flags.isByVal())
8331 ArgValue = ByValArgs[j++];
8332
8333 if (VA.isRegLoc()) {
8334 // Queue up the argument copies and emit them at the end.
8335 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8336 } else {
8337 assert(VA.isMemLoc() && "Argument not register or memory");
8338 assert(!IsTailCall && "Tail call not allowed if stack is used "
8339 "for passing parameters");
8340
8341 // Work out the address of the stack slot.
8342 if (!StackPtr.getNode())
8343 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8345 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8347
8348 // Emit the store.
8349 MemOpChains.push_back(
8350 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8351 }
8352 }
8353
8354 // Join the stores, which are independent of one another.
8355 if (!MemOpChains.empty())
8356 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8357
8358 SDValue Glue;
8359
8360 // Build a sequence of copy-to-reg nodes, chained and glued together.
8361 for (auto &Reg : RegsToPass) {
8362 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8363 Glue = Chain.getValue(1);
8364 }
8365
8366 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8367 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8368 // split it and then direct call can be matched by PseudoCALL.
8370 const GlobalValue *GV = S->getGlobal();
8371 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8374 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8375 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8376 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8379 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8380 }
8381
8382 // The first call operand is the chain and the second is the target address.
8384 Ops.push_back(Chain);
8385 Ops.push_back(Callee);
8386
8387 // Add argument registers to the end of the list so that they are
8388 // known live into the call.
8389 for (auto &Reg : RegsToPass)
8390 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8391
8392 if (!IsTailCall) {
8393 // Add a register mask operand representing the call-preserved registers.
8394 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8395 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8396 assert(Mask && "Missing call preserved mask for calling convention");
8397 Ops.push_back(DAG.getRegisterMask(Mask));
8398 }
8399
8400 // Glue the call to the argument copies, if any.
8401 if (Glue.getNode())
8402 Ops.push_back(Glue);
8403
8404 // Emit the call.
8405 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8406 unsigned Op;
8407 switch (DAG.getTarget().getCodeModel()) {
8408 default:
8409 report_fatal_error("Unsupported code model");
8410 case CodeModel::Small:
8411 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8412 break;
8413 case CodeModel::Medium:
8414 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8416 break;
8417 case CodeModel::Large:
8418 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8420 break;
8421 }
8422
8423 if (IsTailCall) {
8425 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8426 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8427 return Ret;
8428 }
8429
8430 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8431 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8432 Glue = Chain.getValue(1);
8433
8434 // Mark the end of the call, which is glued to the call itself.
8435 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8436 Glue = Chain.getValue(1);
8437
8438 // Assign locations to each value returned by this call.
8440 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8441 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8442
8443 // Copy all of the result registers out of their specified physreg.
8444 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8445 auto &VA = RVLocs[i];
8446 // Copy the value out.
8447 SDValue RetValue =
8448 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8449 // Glue the RetValue to the end of the call sequence.
8450 Chain = RetValue.getValue(1);
8451 Glue = RetValue.getValue(2);
8452
8453 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8454 assert(VA.needsCustom());
8455 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8456 MVT::i32, Glue);
8457 Chain = RetValue2.getValue(1);
8458 Glue = RetValue2.getValue(2);
8459 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8460 RetValue, RetValue2);
8461 } else
8462 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8463
8464 InVals.push_back(RetValue);
8465 }
8466
8467 return Chain;
8468}
8469
8471 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8472 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8473 const Type *RetTy) const {
8475 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8476
8477 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8478 LoongArchABI::ABI ABI =
8479 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8480 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8481 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8482 return false;
8483 }
8484 return true;
8485}
8486
8488 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8490 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8491 SelectionDAG &DAG) const {
8492 // Stores the assignment of the return value to a location.
8494
8495 // Info about the registers and stack slot.
8496 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8497 *DAG.getContext());
8498
8499 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8500 nullptr, CC_LoongArch);
8501 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8502 report_fatal_error("GHC functions return void only");
8503 SDValue Glue;
8504 SmallVector<SDValue, 4> RetOps(1, Chain);
8505
8506 // Copy the result values into the output registers.
8507 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8508 SDValue Val = OutVals[OutIdx];
8509 CCValAssign &VA = RVLocs[i];
8510 assert(VA.isRegLoc() && "Can only return in registers!");
8511
8512 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8513 // Handle returning f64 on LA32D with a soft float ABI.
8514 assert(VA.isRegLoc() && "Expected return via registers");
8515 assert(VA.needsCustom());
8517 DAG.getVTList(MVT::i32, MVT::i32), Val);
8518 SDValue Lo = SplitF64.getValue(0);
8519 SDValue Hi = SplitF64.getValue(1);
8520 Register RegLo = VA.getLocReg();
8521 Register RegHi = RVLocs[++i].getLocReg();
8522
8523 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8524 Glue = Chain.getValue(1);
8525 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8526 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8527 Glue = Chain.getValue(1);
8528 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8529 } else {
8530 // Handle a 'normal' return.
8531 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8532 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8533
8534 // Guarantee that all emitted copies are stuck together.
8535 Glue = Chain.getValue(1);
8536 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8537 }
8538 }
8539
8540 RetOps[0] = Chain; // Update chain.
8541
8542 // Add the glue node if we have it.
8543 if (Glue.getNode())
8544 RetOps.push_back(Glue);
8545
8546 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8547}
8548
8549// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8550// Note: The following prefixes are excluded:
8551// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8552// as they can be represented using [x]vrepli.[whb]
8554 const APInt &SplatValue, const unsigned SplatBitSize) const {
8555 uint64_t RequiredImm = 0;
8556 uint64_t V = SplatValue.getZExtValue();
8557 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8558 // 4'b0101
8559 RequiredImm = (0b10101 << 8) | (V >> 8);
8560 return {true, RequiredImm};
8561 } else if (SplatBitSize == 32) {
8562 // 4'b0001
8563 if (!(V & 0xFFFF00FF)) {
8564 RequiredImm = (0b10001 << 8) | (V >> 8);
8565 return {true, RequiredImm};
8566 }
8567 // 4'b0010
8568 if (!(V & 0xFF00FFFF)) {
8569 RequiredImm = (0b10010 << 8) | (V >> 16);
8570 return {true, RequiredImm};
8571 }
8572 // 4'b0011
8573 if (!(V & 0x00FFFFFF)) {
8574 RequiredImm = (0b10011 << 8) | (V >> 24);
8575 return {true, RequiredImm};
8576 }
8577 // 4'b0110
8578 if ((V & 0xFFFF00FF) == 0xFF) {
8579 RequiredImm = (0b10110 << 8) | (V >> 8);
8580 return {true, RequiredImm};
8581 }
8582 // 4'b0111
8583 if ((V & 0xFF00FFFF) == 0xFFFF) {
8584 RequiredImm = (0b10111 << 8) | (V >> 16);
8585 return {true, RequiredImm};
8586 }
8587 // 4'b1010
8588 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8589 RequiredImm =
8590 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8591 return {true, RequiredImm};
8592 }
8593 } else if (SplatBitSize == 64) {
8594 // 4'b1011
8595 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8596 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8597 RequiredImm =
8598 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8599 return {true, RequiredImm};
8600 }
8601 // 4'b1100
8602 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8603 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8604 RequiredImm =
8605 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8606 return {true, RequiredImm};
8607 }
8608 // 4'b1001
8609 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8610 uint8_t res = 0;
8611 for (int i = 0; i < 8; ++i) {
8612 uint8_t byte = x & 0xFF;
8613 if (byte == 0 || byte == 0xFF)
8614 res |= ((byte & 1) << i);
8615 else
8616 return {false, 0};
8617 x >>= 8;
8618 }
8619 return {true, res};
8620 };
8621 auto [IsSame, Suffix] = sameBitsPreByte(V);
8622 if (IsSame) {
8623 RequiredImm = (0b11001 << 8) | Suffix;
8624 return {true, RequiredImm};
8625 }
8626 }
8627 return {false, RequiredImm};
8628}
8629
8631 EVT VT) const {
8632 if (!Subtarget.hasExtLSX())
8633 return false;
8634
8635 if (VT == MVT::f32) {
8636 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8637 return (masked == 0x3e000000 || masked == 0x40000000);
8638 }
8639
8640 if (VT == MVT::f64) {
8641 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8642 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8643 }
8644
8645 return false;
8646}
8647
8648bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8649 bool ForCodeSize) const {
8650 // TODO: Maybe need more checks here after vector extension is supported.
8651 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8652 return false;
8653 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8654 return false;
8655 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8656}
8657
8659 return true;
8660}
8661
8663 return true;
8664}
8665
8666bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8667 const Instruction *I) const {
8668 if (!Subtarget.is64Bit())
8669 return isa<LoadInst>(I) || isa<StoreInst>(I);
8670
8671 if (isa<LoadInst>(I))
8672 return true;
8673
8674 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8675 // require fences beacuse we can use amswap_db.[w/d].
8676 Type *Ty = I->getOperand(0)->getType();
8677 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8678 unsigned Size = Ty->getIntegerBitWidth();
8679 return (Size == 8 || Size == 16);
8680 }
8681
8682 return false;
8683}
8684
8686 LLVMContext &Context,
8687 EVT VT) const {
8688 if (!VT.isVector())
8689 return getPointerTy(DL);
8691}
8692
8694 EVT VT = Y.getValueType();
8695
8696 if (VT.isVector())
8697 return Subtarget.hasExtLSX() && VT.isInteger();
8698
8699 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8700}
8701
8703 const CallInst &I,
8704 MachineFunction &MF,
8705 unsigned Intrinsic) const {
8706 switch (Intrinsic) {
8707 default:
8708 return false;
8709 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8710 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8711 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8712 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8713 Info.opc = ISD::INTRINSIC_W_CHAIN;
8714 Info.memVT = MVT::i32;
8715 Info.ptrVal = I.getArgOperand(0);
8716 Info.offset = 0;
8717 Info.align = Align(4);
8720 return true;
8721 // TODO: Add more Intrinsics later.
8722 }
8723}
8724
8725// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8726// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8727// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8728// regression, we need to implement it manually.
8731
8733 Op == AtomicRMWInst::And) &&
8734 "Unable to expand");
8735 unsigned MinWordSize = 4;
8736
8737 IRBuilder<> Builder(AI);
8738 LLVMContext &Ctx = Builder.getContext();
8739 const DataLayout &DL = AI->getDataLayout();
8740 Type *ValueType = AI->getType();
8741 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8742
8743 Value *Addr = AI->getPointerOperand();
8744 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8745 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8746
8747 Value *AlignedAddr = Builder.CreateIntrinsic(
8748 Intrinsic::ptrmask, {PtrTy, IntTy},
8749 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8750 "AlignedAddr");
8751
8752 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8753 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8754 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8755 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8756 Value *Mask = Builder.CreateShl(
8757 ConstantInt::get(WordType,
8758 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8759 ShiftAmt, "Mask");
8760 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8761 Value *ValOperand_Shifted =
8762 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8763 ShiftAmt, "ValOperand_Shifted");
8764 Value *NewOperand;
8765 if (Op == AtomicRMWInst::And)
8766 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8767 else
8768 NewOperand = ValOperand_Shifted;
8769
8770 AtomicRMWInst *NewAI =
8771 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8772 AI->getOrdering(), AI->getSyncScopeID());
8773
8774 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8775 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8776 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8777 AI->replaceAllUsesWith(FinalOldResult);
8778 AI->eraseFromParent();
8779}
8780
8783 // TODO: Add more AtomicRMWInst that needs to be extended.
8784
8785 // Since floating-point operation requires a non-trivial set of data
8786 // operations, use CmpXChg to expand.
8787 if (AI->isFloatingPointOperation() ||
8793
8794 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8797 AI->getOperation() == AtomicRMWInst::Sub)) {
8799 }
8800
8801 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8802 if (Subtarget.hasLAMCAS()) {
8803 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8807 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8809 }
8810
8811 if (Size == 8 || Size == 16)
8814}
8815
8816static Intrinsic::ID
8818 AtomicRMWInst::BinOp BinOp) {
8819 if (GRLen == 64) {
8820 switch (BinOp) {
8821 default:
8822 llvm_unreachable("Unexpected AtomicRMW BinOp");
8824 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8825 case AtomicRMWInst::Add:
8826 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8827 case AtomicRMWInst::Sub:
8828 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8830 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8832 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8834 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8835 case AtomicRMWInst::Max:
8836 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8837 case AtomicRMWInst::Min:
8838 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8839 // TODO: support other AtomicRMWInst.
8840 }
8841 }
8842
8843 if (GRLen == 32) {
8844 switch (BinOp) {
8845 default:
8846 llvm_unreachable("Unexpected AtomicRMW BinOp");
8848 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8849 case AtomicRMWInst::Add:
8850 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8851 case AtomicRMWInst::Sub:
8852 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8854 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8856 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8858 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8859 case AtomicRMWInst::Max:
8860 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8861 case AtomicRMWInst::Min:
8862 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8863 // TODO: support other AtomicRMWInst.
8864 }
8865 }
8866
8867 llvm_unreachable("Unexpected GRLen\n");
8868}
8869
8872 AtomicCmpXchgInst *CI) const {
8873
8874 if (Subtarget.hasLAMCAS())
8876
8878 if (Size == 8 || Size == 16)
8881}
8882
8884 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8885 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8886 unsigned GRLen = Subtarget.getGRLen();
8887 AtomicOrdering FailOrd = CI->getFailureOrdering();
8888 Value *FailureOrdering =
8889 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8890 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8891 if (GRLen == 64) {
8892 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8893 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8894 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8895 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8896 }
8897 Type *Tys[] = {AlignedAddr->getType()};
8898 Value *Result = Builder.CreateIntrinsic(
8899 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8900 if (GRLen == 64)
8901 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8902 return Result;
8903}
8904
8906 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8907 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8908 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8909 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8910 // mask, as this produces better code than the LL/SC loop emitted by
8911 // int_loongarch_masked_atomicrmw_xchg.
8912 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8915 if (CVal->isZero())
8916 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8917 Builder.CreateNot(Mask, "Inv_Mask"),
8918 AI->getAlign(), Ord);
8919 if (CVal->isMinusOne())
8920 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8921 AI->getAlign(), Ord);
8922 }
8923
8924 unsigned GRLen = Subtarget.getGRLen();
8925 Value *Ordering =
8926 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8927 Type *Tys[] = {AlignedAddr->getType()};
8929 AI->getModule(),
8931
8932 if (GRLen == 64) {
8933 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8934 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8935 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8936 }
8937
8938 Value *Result;
8939
8940 // Must pass the shift amount needed to sign extend the loaded value prior
8941 // to performing a signed comparison for min/max. ShiftAmt is the number of
8942 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8943 // is the number of bits to left+right shift the value in order to
8944 // sign-extend.
8945 if (AI->getOperation() == AtomicRMWInst::Min ||
8947 const DataLayout &DL = AI->getDataLayout();
8948 unsigned ValWidth =
8949 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8950 Value *SextShamt =
8951 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8952 Result = Builder.CreateCall(LlwOpScwLoop,
8953 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8954 } else {
8955 Result =
8956 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8957 }
8958
8959 if (GRLen == 64)
8960 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8961 return Result;
8962}
8963
8965 const MachineFunction &MF, EVT VT) const {
8966 VT = VT.getScalarType();
8967
8968 if (!VT.isSimple())
8969 return false;
8970
8971 switch (VT.getSimpleVT().SimpleTy) {
8972 case MVT::f32:
8973 case MVT::f64:
8974 return true;
8975 default:
8976 break;
8977 }
8978
8979 return false;
8980}
8981
8983 const Constant *PersonalityFn) const {
8984 return LoongArch::R4;
8985}
8986
8988 const Constant *PersonalityFn) const {
8989 return LoongArch::R5;
8990}
8991
8992//===----------------------------------------------------------------------===//
8993// Target Optimization Hooks
8994//===----------------------------------------------------------------------===//
8995
8997 const LoongArchSubtarget &Subtarget) {
8998 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8999 // IEEE float has 23 digits and double has 52 digits.
9000 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9001 return RefinementSteps;
9002}
9003
9005 SelectionDAG &DAG, int Enabled,
9006 int &RefinementSteps,
9007 bool &UseOneConstNR,
9008 bool Reciprocal) const {
9009 if (Subtarget.hasFrecipe()) {
9010 SDLoc DL(Operand);
9011 EVT VT = Operand.getValueType();
9012
9013 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9014 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9015 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9016 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9017 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9018
9019 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9020 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9021
9022 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9023 if (Reciprocal)
9024 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9025
9026 return Estimate;
9027 }
9028 }
9029
9030 return SDValue();
9031}
9032
9034 SelectionDAG &DAG,
9035 int Enabled,
9036 int &RefinementSteps) const {
9037 if (Subtarget.hasFrecipe()) {
9038 SDLoc DL(Operand);
9039 EVT VT = Operand.getValueType();
9040
9041 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9042 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9043 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9044 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9045 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9046
9047 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9048 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9049
9050 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9051 }
9052 }
9053
9054 return SDValue();
9055}
9056
9057//===----------------------------------------------------------------------===//
9058// LoongArch Inline Assembly Support
9059//===----------------------------------------------------------------------===//
9060
9062LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9063 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9064 //
9065 // 'f': A floating-point register (if available).
9066 // 'k': A memory operand whose address is formed by a base register and
9067 // (optionally scaled) index register.
9068 // 'l': A signed 16-bit constant.
9069 // 'm': A memory operand whose address is formed by a base register and
9070 // offset that is suitable for use in instructions with the same
9071 // addressing mode as st.w and ld.w.
9072 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9073 // instruction)
9074 // 'I': A signed 12-bit constant (for arithmetic instructions).
9075 // 'J': Integer zero.
9076 // 'K': An unsigned 12-bit constant (for logic instructions).
9077 // "ZB": An address that is held in a general-purpose register. The offset is
9078 // zero.
9079 // "ZC": A memory operand whose address is formed by a base register and
9080 // offset that is suitable for use in instructions with the same
9081 // addressing mode as ll.w and sc.w.
9082 if (Constraint.size() == 1) {
9083 switch (Constraint[0]) {
9084 default:
9085 break;
9086 case 'f':
9087 case 'q':
9088 return C_RegisterClass;
9089 case 'l':
9090 case 'I':
9091 case 'J':
9092 case 'K':
9093 return C_Immediate;
9094 case 'k':
9095 return C_Memory;
9096 }
9097 }
9098
9099 if (Constraint == "ZC" || Constraint == "ZB")
9100 return C_Memory;
9101
9102 // 'm' is handled here.
9103 return TargetLowering::getConstraintType(Constraint);
9104}
9105
9106InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9107 StringRef ConstraintCode) const {
9108 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9112 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9113}
9114
9115std::pair<unsigned, const TargetRegisterClass *>
9116LoongArchTargetLowering::getRegForInlineAsmConstraint(
9117 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9118 // First, see if this is a constraint that directly corresponds to a LoongArch
9119 // register class.
9120 if (Constraint.size() == 1) {
9121 switch (Constraint[0]) {
9122 case 'r':
9123 // TODO: Support fixed vectors up to GRLen?
9124 if (VT.isVector())
9125 break;
9126 return std::make_pair(0U, &LoongArch::GPRRegClass);
9127 case 'q':
9128 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9129 case 'f':
9130 if (Subtarget.hasBasicF() && VT == MVT::f32)
9131 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9132 if (Subtarget.hasBasicD() && VT == MVT::f64)
9133 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9134 if (Subtarget.hasExtLSX() &&
9135 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9136 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9137 if (Subtarget.hasExtLASX() &&
9138 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9139 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9140 break;
9141 default:
9142 break;
9143 }
9144 }
9145
9146 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9147 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9148 // constraints while the official register name is prefixed with a '$'. So we
9149 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9150 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9151 // case insensitive, so no need to convert the constraint to upper case here.
9152 //
9153 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9154 // decode the usage of register name aliases into their official names. And
9155 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9156 // official register names.
9157 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9158 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9159 bool IsFP = Constraint[2] == 'f';
9160 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9161 std::pair<unsigned, const TargetRegisterClass *> R;
9163 TRI, join_items("", Temp.first, Temp.second), VT);
9164 // Match those names to the widest floating point register type available.
9165 if (IsFP) {
9166 unsigned RegNo = R.first;
9167 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9168 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9169 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9170 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9171 }
9172 }
9173 }
9174 return R;
9175 }
9176
9177 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9178}
9179
9180void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9181 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9182 SelectionDAG &DAG) const {
9183 // Currently only support length 1 constraints.
9184 if (Constraint.size() == 1) {
9185 switch (Constraint[0]) {
9186 case 'l':
9187 // Validate & create a 16-bit signed immediate operand.
9188 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9189 uint64_t CVal = C->getSExtValue();
9190 if (isInt<16>(CVal))
9191 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9192 Subtarget.getGRLenVT()));
9193 }
9194 return;
9195 case 'I':
9196 // Validate & create a 12-bit signed immediate operand.
9197 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9198 uint64_t CVal = C->getSExtValue();
9199 if (isInt<12>(CVal))
9200 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9201 Subtarget.getGRLenVT()));
9202 }
9203 return;
9204 case 'J':
9205 // Validate & create an integer zero operand.
9206 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9207 if (C->getZExtValue() == 0)
9208 Ops.push_back(
9209 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9210 return;
9211 case 'K':
9212 // Validate & create a 12-bit unsigned immediate operand.
9213 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9214 uint64_t CVal = C->getZExtValue();
9215 if (isUInt<12>(CVal))
9216 Ops.push_back(
9217 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9218 }
9219 return;
9220 default:
9221 break;
9222 }
9223 }
9225}
9226
9227#define GET_REGISTER_MATCHER
9228#include "LoongArchGenAsmMatcher.inc"
9229
9232 const MachineFunction &MF) const {
9233 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9234 std::string NewRegName = Name.second.str();
9235 Register Reg = MatchRegisterAltName(NewRegName);
9236 if (!Reg)
9237 Reg = MatchRegisterName(NewRegName);
9238 if (!Reg)
9239 return Reg;
9240 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9241 if (!ReservedRegs.test(Reg))
9242 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9243 StringRef(RegName) + "\"."));
9244 return Reg;
9245}
9246
9248 EVT VT, SDValue C) const {
9249 // TODO: Support vectors.
9250 if (!VT.isScalarInteger())
9251 return false;
9252
9253 // Omit the optimization if the data size exceeds GRLen.
9254 if (VT.getSizeInBits() > Subtarget.getGRLen())
9255 return false;
9256
9257 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9258 const APInt &Imm = ConstNode->getAPIntValue();
9259 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9260 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9261 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9262 return true;
9263 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9264 if (ConstNode->hasOneUse() &&
9265 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9266 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9267 return true;
9268 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9269 // in which the immediate has two set bits. Or Break (MUL x, imm)
9270 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9271 // equals to (1 << s0) - (1 << s1).
9272 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9273 unsigned Shifts = Imm.countr_zero();
9274 // Reject immediates which can be composed via a single LUI.
9275 if (Shifts >= 12)
9276 return false;
9277 // Reject multiplications can be optimized to
9278 // (SLLI (ALSL x, x, 1/2/3/4), s).
9279 APInt ImmPop = Imm.ashr(Shifts);
9280 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9281 return false;
9282 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9283 // since it needs one more instruction than other 3 cases.
9284 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9285 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9286 (ImmSmall - Imm).isPowerOf2())
9287 return true;
9288 }
9289 }
9290
9291 return false;
9292}
9293
9295 const AddrMode &AM,
9296 Type *Ty, unsigned AS,
9297 Instruction *I) const {
9298 // LoongArch has four basic addressing modes:
9299 // 1. reg
9300 // 2. reg + 12-bit signed offset
9301 // 3. reg + 14-bit signed offset left-shifted by 2
9302 // 4. reg1 + reg2
9303 // TODO: Add more checks after support vector extension.
9304
9305 // No global is ever allowed as a base.
9306 if (AM.BaseGV)
9307 return false;
9308
9309 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9310 // with `UAL` feature.
9311 if (!isInt<12>(AM.BaseOffs) &&
9312 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9313 return false;
9314
9315 switch (AM.Scale) {
9316 case 0:
9317 // "r+i" or just "i", depending on HasBaseReg.
9318 break;
9319 case 1:
9320 // "r+r+i" is not allowed.
9321 if (AM.HasBaseReg && AM.BaseOffs)
9322 return false;
9323 // Otherwise we have "r+r" or "r+i".
9324 break;
9325 case 2:
9326 // "2*r+r" or "2*r+i" is not allowed.
9327 if (AM.HasBaseReg || AM.BaseOffs)
9328 return false;
9329 // Allow "2*r" as "r+r".
9330 break;
9331 default:
9332 return false;
9333 }
9334
9335 return true;
9336}
9337
9339 return isInt<12>(Imm);
9340}
9341
9343 return isInt<12>(Imm);
9344}
9345
9347 // Zexts are free if they can be combined with a load.
9348 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9349 // poorly with type legalization of compares preferring sext.
9350 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9351 EVT MemVT = LD->getMemoryVT();
9352 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9353 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9354 LD->getExtensionType() == ISD::ZEXTLOAD))
9355 return true;
9356 }
9357
9358 return TargetLowering::isZExtFree(Val, VT2);
9359}
9360
9362 EVT DstVT) const {
9363 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9364}
9365
9367 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9368}
9369
9371 // TODO: Support vectors.
9372 if (Y.getValueType().isVector())
9373 return false;
9374
9375 return !isa<ConstantSDNode>(Y);
9376}
9377
9379 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9380 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9381}
9382
9384 Type *Ty, bool IsSigned) const {
9385 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9386 return true;
9387
9388 return IsSigned;
9389}
9390
9392 // Return false to suppress the unnecessary extensions if the LibCall
9393 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9394 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9395 Type.getSizeInBits() < Subtarget.getGRLen()))
9396 return false;
9397 return true;
9398}
9399
9400// memcpy, and other memory intrinsics, typically tries to use wider load/store
9401// if the source/dest is aligned and the copy size is large enough. We therefore
9402// want to align such objects passed to memory intrinsics.
9404 unsigned &MinSize,
9405 Align &PrefAlign) const {
9406 if (!isa<MemIntrinsic>(CI))
9407 return false;
9408
9409 if (Subtarget.is64Bit()) {
9410 MinSize = 8;
9411 PrefAlign = Align(8);
9412 } else {
9413 MinSize = 4;
9414 PrefAlign = Align(4);
9415 }
9416
9417 return true;
9418}
9419
9428
9429bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9430 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9431 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9432 bool IsABIRegCopy = CC.has_value();
9433 EVT ValueVT = Val.getValueType();
9434
9435 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9436 PartVT == MVT::f32) {
9437 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9438 // nan, and cast to f32.
9439 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9440 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9441 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9442 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9443 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9444 Parts[0] = Val;
9445 return true;
9446 }
9447
9448 return false;
9449}
9450
9451SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9452 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9453 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9454 bool IsABIRegCopy = CC.has_value();
9455
9456 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9457 PartVT == MVT::f32) {
9458 SDValue Val = Parts[0];
9459
9460 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9461 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9462 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9463 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9464 return Val;
9465 }
9466
9467 return SDValue();
9468}
9469
9470MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9471 CallingConv::ID CC,
9472 EVT VT) const {
9473 // Use f32 to pass f16.
9474 if (VT == MVT::f16 && Subtarget.hasBasicF())
9475 return MVT::f32;
9476
9478}
9479
9480unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9481 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9482 // Use f32 to pass f16.
9483 if (VT == MVT::f16 && Subtarget.hasBasicF())
9484 return 1;
9485
9487}
9488
9490 SDValue Op, const APInt &OriginalDemandedBits,
9491 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9492 unsigned Depth) const {
9493 EVT VT = Op.getValueType();
9494 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9495 unsigned Opc = Op.getOpcode();
9496 switch (Opc) {
9497 default:
9498 break;
9501 SDValue Src = Op.getOperand(0);
9502 MVT SrcVT = Src.getSimpleValueType();
9503 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9504 unsigned NumElts = SrcVT.getVectorNumElements();
9505
9506 // If we don't need the sign bits at all just return zero.
9507 if (OriginalDemandedBits.countr_zero() >= NumElts)
9508 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9509
9510 // Only demand the vector elements of the sign bits we need.
9511 APInt KnownUndef, KnownZero;
9512 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9513 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9514 TLO, Depth + 1))
9515 return true;
9516
9517 Known.Zero = KnownZero.zext(BitWidth);
9518 Known.Zero.setHighBits(BitWidth - NumElts);
9519
9520 // [X]VMSKLTZ only uses the MSB from each vector element.
9521 KnownBits KnownSrc;
9522 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9523 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9524 Depth + 1))
9525 return true;
9526
9527 if (KnownSrc.One[SrcBits - 1])
9528 Known.One.setLowBits(NumElts);
9529 else if (KnownSrc.Zero[SrcBits - 1])
9530 Known.Zero.setLowBits(NumElts);
9531
9532 // Attempt to avoid multi-use ops if we don't need anything from it.
9534 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9535 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9536 return false;
9537 }
9538 }
9539
9541 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9542}
9543
9545 unsigned Opc = VecOp.getOpcode();
9546
9547 // Assume target opcodes can't be scalarized.
9548 // TODO - do we have any exceptions?
9549 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9550 return false;
9551
9552 // If the vector op is not supported, try to convert to scalar.
9553 EVT VecVT = VecOp.getValueType();
9555 return true;
9556
9557 // If the vector op is supported, but the scalar op is not, the transform may
9558 // not be worthwhile.
9559 EVT ScalarVT = VecVT.getScalarType();
9560 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9561}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...