LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
93 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
94 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
95 setOperationAction(ISD::VASTART, MVT::Other, Custom);
96 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
97
98 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
99 setOperationAction(ISD::TRAP, MVT::Other, Legal);
100
104
105 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
128 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
147 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
158
162 Custom);
163 setOperationAction(ISD::LROUND, MVT::i32, Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
175 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
176 }
177
178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
194 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
196 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
199 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
204 setOperationAction(ISD::FSIN, MVT::f32, Expand);
205 setOperationAction(ISD::FCOS, MVT::f32, Expand);
206 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
207 setOperationAction(ISD::FPOW, MVT::f32, Expand);
209 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
213 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
214 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
218 setOperationAction(ISD::FRINT, MVT::f32, Legal);
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
241 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
245 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
249 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
251 setOperationAction(ISD::FSIN, MVT::f64, Expand);
252 setOperationAction(ISD::FCOS, MVT::f64, Expand);
253 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
254 setOperationAction(ISD::FPOW, MVT::f64, Expand);
256 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
257 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
259 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
260 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
264 setOperationAction(ISD::FRINT, MVT::f64, Legal);
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
285 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
286 setOperationAction(ISD::BITCAST, VT, Legal);
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
318 }
319 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
321 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
323 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
326 }
327 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 setOperationAction(ISD::FSQRT, VT, Legal);
332 setOperationAction(ISD::FNEG, VT, Legal);
335 VT, Expand);
337 }
339 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
340 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
341 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
342 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
343
344 for (MVT VT :
345 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
346 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
348 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
349 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
350 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
351 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
352 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
353 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
354 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
355 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
356 }
357 }
358
359 // Set operations for 'LASX' feature.
360
361 if (Subtarget.hasExtLASX()) {
362 for (MVT VT : LASXVTs) {
363 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
364 setOperationAction(ISD::BITCAST, VT, Legal);
366
372
376 }
377 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
380 Legal);
382 VT, Legal);
389 Expand);
397 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
398 }
399 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
401 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
403 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
406 }
407 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
411 setOperationAction(ISD::FSQRT, VT, Legal);
412 setOperationAction(ISD::FNEG, VT, Legal);
415 VT, Expand);
417 }
418 }
419
420 // Set DAG combine for LA32 and LA64.
421
426
427 // Set DAG combine for 'LSX' feature.
428
429 if (Subtarget.hasExtLSX()) {
431 setTargetDAGCombine(ISD::BITCAST);
432 }
433
434 // Set DAG combine for 'LASX' feature.
435
436 if (Subtarget.hasExtLASX())
438
439 // Compute derived properties from the register classes.
440 computeRegisterProperties(Subtarget.getRegisterInfo());
441
443
446
447 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
448
450
451 // Function alignments.
453 // Set preferred alignments.
454 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
455 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
456 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
457
458 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
459 if (Subtarget.hasLAMCAS())
461
462 if (Subtarget.hasSCQ()) {
464 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
465 }
466}
467
469 const GlobalAddressSDNode *GA) const {
470 // In order to maximise the opportunity for common subexpression elimination,
471 // keep a separate ADD node for the global address offset instead of folding
472 // it in the global address node. Later peephole optimisations may choose to
473 // fold it back in when profitable.
474 return false;
475}
476
478 SelectionDAG &DAG) const {
479 switch (Op.getOpcode()) {
480 case ISD::ATOMIC_FENCE:
481 return lowerATOMIC_FENCE(Op, DAG);
483 return lowerEH_DWARF_CFA(Op, DAG);
485 return lowerGlobalAddress(Op, DAG);
487 return lowerGlobalTLSAddress(Op, DAG);
489 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
491 return lowerINTRINSIC_W_CHAIN(Op, DAG);
493 return lowerINTRINSIC_VOID(Op, DAG);
495 return lowerBlockAddress(Op, DAG);
496 case ISD::JumpTable:
497 return lowerJumpTable(Op, DAG);
498 case ISD::SHL_PARTS:
499 return lowerShiftLeftParts(Op, DAG);
500 case ISD::SRA_PARTS:
501 return lowerShiftRightParts(Op, DAG, true);
502 case ISD::SRL_PARTS:
503 return lowerShiftRightParts(Op, DAG, false);
505 return lowerConstantPool(Op, DAG);
506 case ISD::FP_TO_SINT:
507 return lowerFP_TO_SINT(Op, DAG);
508 case ISD::BITCAST:
509 return lowerBITCAST(Op, DAG);
510 case ISD::UINT_TO_FP:
511 return lowerUINT_TO_FP(Op, DAG);
512 case ISD::SINT_TO_FP:
513 return lowerSINT_TO_FP(Op, DAG);
514 case ISD::VASTART:
515 return lowerVASTART(Op, DAG);
516 case ISD::FRAMEADDR:
517 return lowerFRAMEADDR(Op, DAG);
518 case ISD::RETURNADDR:
519 return lowerRETURNADDR(Op, DAG);
521 return lowerWRITE_REGISTER(Op, DAG);
523 return lowerINSERT_VECTOR_ELT(Op, DAG);
525 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
527 return lowerBUILD_VECTOR(Op, DAG);
529 return lowerCONCAT_VECTORS(Op, DAG);
531 return lowerVECTOR_SHUFFLE(Op, DAG);
532 case ISD::BITREVERSE:
533 return lowerBITREVERSE(Op, DAG);
535 return lowerSCALAR_TO_VECTOR(Op, DAG);
536 case ISD::PREFETCH:
537 return lowerPREFETCH(Op, DAG);
538 case ISD::SELECT:
539 return lowerSELECT(Op, DAG);
540 case ISD::BRCOND:
541 return lowerBRCOND(Op, DAG);
542 case ISD::FP_TO_FP16:
543 return lowerFP_TO_FP16(Op, DAG);
544 case ISD::FP16_TO_FP:
545 return lowerFP16_TO_FP(Op, DAG);
546 case ISD::FP_TO_BF16:
547 return lowerFP_TO_BF16(Op, DAG);
548 case ISD::BF16_TO_FP:
549 return lowerBF16_TO_FP(Op, DAG);
550 case ISD::VECREDUCE_ADD:
551 return lowerVECREDUCE_ADD(Op, DAG);
552 case ISD::VECREDUCE_AND:
553 case ISD::VECREDUCE_OR:
554 case ISD::VECREDUCE_XOR:
555 case ISD::VECREDUCE_SMAX:
556 case ISD::VECREDUCE_SMIN:
557 case ISD::VECREDUCE_UMAX:
558 case ISD::VECREDUCE_UMIN:
559 return lowerVECREDUCE(Op, DAG);
560 }
561 return SDValue();
562}
563
564// Lower vecreduce_add using vhaddw instructions.
565// For Example:
566// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
567// can be lowered to:
568// VHADDW_D_W vr0, vr0, vr0
569// VHADDW_Q_D vr0, vr0, vr0
570// VPICKVE2GR_D a0, vr0, 0
571// ADDI_W a0, a0, 0
572SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
573 SelectionDAG &DAG) const {
574
575 SDLoc DL(Op);
576 MVT OpVT = Op.getSimpleValueType();
577 SDValue Val = Op.getOperand(0);
578
579 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
580 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
581
582 unsigned LegalVecSize = 128;
583 bool isLASX256Vector =
584 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
585
586 // Ensure operand type legal or enable it legal.
587 while (!isTypeLegal(Val.getSimpleValueType())) {
588 Val = DAG.WidenVector(Val, DL);
589 }
590
591 // NumEles is designed for iterations count, v4i32 for LSX
592 // and v8i32 for LASX should have the same count.
593 if (isLASX256Vector) {
594 NumEles /= 2;
595 LegalVecSize = 256;
596 }
597
598 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
599 MVT IntTy = MVT::getIntegerVT(EleBits);
600 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
601 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
602 }
603
604 if (isLASX256Vector) {
605 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
606 DAG.getConstant(2, DL, MVT::i64));
607 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
608 }
609
610 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
611 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
612}
613
614// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
615// For Example:
616// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
617// can be lowered to:
618// VBSRL_V vr1, vr0, 8
619// VMAX_W vr0, vr1, vr0
620// VBSRL_V vr1, vr0, 4
621// VMAX_W vr0, vr1, vr0
622// VPICKVE2GR_W a0, vr0, 0
623// For 256 bit vector, it is illegal and will be spilt into
624// two 128 bit vector by default then processed by this.
625SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
626 SelectionDAG &DAG) const {
627 SDLoc DL(Op);
628
629 MVT OpVT = Op.getSimpleValueType();
630 SDValue Val = Op.getOperand(0);
631
632 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
633 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
634
635 // Ensure operand type legal or enable it legal.
636 while (!isTypeLegal(Val.getSimpleValueType())) {
637 Val = DAG.WidenVector(Val, DL);
638 }
639
640 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
641 MVT VecTy = Val.getSimpleValueType();
642
643 for (int i = NumEles; i > 1; i /= 2) {
644 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
645 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
646 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
647 }
648
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
650 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
651}
652
653SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
654 SelectionDAG &DAG) const {
655 unsigned IsData = Op.getConstantOperandVal(4);
656
657 // We don't support non-data prefetch.
658 // Just preserve the chain.
659 if (!IsData)
660 return Op.getOperand(0);
661
662 return Op;
663}
664
665// Return true if Val is equal to (setcc LHS, RHS, CC).
666// Return false if Val is the inverse of (setcc LHS, RHS, CC).
667// Otherwise, return std::nullopt.
668static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
669 ISD::CondCode CC, SDValue Val) {
670 assert(Val->getOpcode() == ISD::SETCC);
671 SDValue LHS2 = Val.getOperand(0);
672 SDValue RHS2 = Val.getOperand(1);
673 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
674
675 if (LHS == LHS2 && RHS == RHS2) {
676 if (CC == CC2)
677 return true;
678 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
679 return false;
680 } else if (LHS == RHS2 && RHS == LHS2) {
682 if (CC == CC2)
683 return true;
684 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
685 return false;
686 }
687
688 return std::nullopt;
689}
690
692 const LoongArchSubtarget &Subtarget) {
693 SDValue CondV = N->getOperand(0);
694 SDValue TrueV = N->getOperand(1);
695 SDValue FalseV = N->getOperand(2);
696 MVT VT = N->getSimpleValueType(0);
697 SDLoc DL(N);
698
699 // (select c, -1, y) -> -c | y
700 if (isAllOnesConstant(TrueV)) {
701 SDValue Neg = DAG.getNegative(CondV, DL, VT);
702 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
703 }
704 // (select c, y, -1) -> (c-1) | y
705 if (isAllOnesConstant(FalseV)) {
706 SDValue Neg =
707 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
708 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
709 }
710
711 // (select c, 0, y) -> (c-1) & y
712 if (isNullConstant(TrueV)) {
713 SDValue Neg =
714 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
715 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
716 }
717 // (select c, y, 0) -> -c & y
718 if (isNullConstant(FalseV)) {
719 SDValue Neg = DAG.getNegative(CondV, DL, VT);
720 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
721 }
722
723 // select c, ~x, x --> xor -c, x
724 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
725 const APInt &TrueVal = TrueV->getAsAPIntVal();
726 const APInt &FalseVal = FalseV->getAsAPIntVal();
727 if (~TrueVal == FalseVal) {
728 SDValue Neg = DAG.getNegative(CondV, DL, VT);
729 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
730 }
731 }
732
733 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
734 // when both truev and falsev are also setcc.
735 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
736 FalseV.getOpcode() == ISD::SETCC) {
737 SDValue LHS = CondV.getOperand(0);
738 SDValue RHS = CondV.getOperand(1);
739 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
740
741 // (select x, x, y) -> x | y
742 // (select !x, x, y) -> x & y
743 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
744 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
745 DAG.getFreeze(FalseV));
746 }
747 // (select x, y, x) -> x & y
748 // (select !x, y, x) -> x | y
749 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
750 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
751 DAG.getFreeze(TrueV), FalseV);
752 }
753 }
754
755 return SDValue();
756}
757
758// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
759// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
760// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
761// being `0` or `-1`. In such cases we can replace `select` with `and`.
762// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
763// than `c0`?
764static SDValue
766 const LoongArchSubtarget &Subtarget) {
767 unsigned SelOpNo = 0;
768 SDValue Sel = BO->getOperand(0);
769 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
770 SelOpNo = 1;
771 Sel = BO->getOperand(1);
772 }
773
774 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
775 return SDValue();
776
777 unsigned ConstSelOpNo = 1;
778 unsigned OtherSelOpNo = 2;
779 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
780 ConstSelOpNo = 2;
781 OtherSelOpNo = 1;
782 }
783 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
784 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
785 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
786 return SDValue();
787
788 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
789 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
790 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
791 return SDValue();
792
793 SDLoc DL(Sel);
794 EVT VT = BO->getValueType(0);
795
796 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
797 if (SelOpNo == 1)
798 std::swap(NewConstOps[0], NewConstOps[1]);
799
800 SDValue NewConstOp =
801 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
802 if (!NewConstOp)
803 return SDValue();
804
805 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
806 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
807 return SDValue();
808
809 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
810 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
811 if (SelOpNo == 1)
812 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
813 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
814
815 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
816 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
817 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
818}
819
820// Changes the condition code and swaps operands if necessary, so the SetCC
821// operation matches one of the comparisons supported directly by branches
822// in the LoongArch ISA. May adjust compares to favor compare with 0 over
823// compare with 1/-1.
825 ISD::CondCode &CC, SelectionDAG &DAG) {
826 // If this is a single bit test that can't be handled by ANDI, shift the
827 // bit to be tested to the MSB and perform a signed compare with 0.
828 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
829 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
830 isa<ConstantSDNode>(LHS.getOperand(1))) {
831 uint64_t Mask = LHS.getConstantOperandVal(1);
832 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
833 unsigned ShAmt = 0;
834 if (isPowerOf2_64(Mask)) {
835 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
836 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
837 } else {
838 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
839 }
840
841 LHS = LHS.getOperand(0);
842 if (ShAmt != 0)
843 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
844 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
845 return;
846 }
847 }
848
849 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
850 int64_t C = RHSC->getSExtValue();
851 switch (CC) {
852 default:
853 break;
854 case ISD::SETGT:
855 // Convert X > -1 to X >= 0.
856 if (C == -1) {
857 RHS = DAG.getConstant(0, DL, RHS.getValueType());
858 CC = ISD::SETGE;
859 return;
860 }
861 break;
862 case ISD::SETLT:
863 // Convert X < 1 to 0 >= X.
864 if (C == 1) {
865 RHS = LHS;
866 LHS = DAG.getConstant(0, DL, RHS.getValueType());
867 CC = ISD::SETGE;
868 return;
869 }
870 break;
871 }
872 }
873
874 switch (CC) {
875 default:
876 break;
877 case ISD::SETGT:
878 case ISD::SETLE:
879 case ISD::SETUGT:
880 case ISD::SETULE:
882 std::swap(LHS, RHS);
883 break;
884 }
885}
886
887SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
888 SelectionDAG &DAG) const {
889 SDValue CondV = Op.getOperand(0);
890 SDValue TrueV = Op.getOperand(1);
891 SDValue FalseV = Op.getOperand(2);
892 SDLoc DL(Op);
893 MVT VT = Op.getSimpleValueType();
894 MVT GRLenVT = Subtarget.getGRLenVT();
895
896 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
897 return V;
898
899 if (Op.hasOneUse()) {
900 unsigned UseOpc = Op->user_begin()->getOpcode();
901 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
902 SDNode *BinOp = *Op->user_begin();
903 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
904 DAG, Subtarget)) {
905 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
906 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
907 // may return a constant node and cause crash in lowerSELECT.
908 if (NewSel.getOpcode() == ISD::SELECT)
909 return lowerSELECT(NewSel, DAG);
910 return NewSel;
911 }
912 }
913 }
914
915 // If the condition is not an integer SETCC which operates on GRLenVT, we need
916 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
917 // (select condv, truev, falsev)
918 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
919 if (CondV.getOpcode() != ISD::SETCC ||
920 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
921 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
922 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
923
924 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
925
926 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
927 }
928
929 // If the CondV is the output of a SETCC node which operates on GRLenVT
930 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
931 // to take advantage of the integer compare+branch instructions. i.e.: (select
932 // (setcc lhs, rhs, cc), truev, falsev)
933 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
934 SDValue LHS = CondV.getOperand(0);
935 SDValue RHS = CondV.getOperand(1);
936 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
937
938 // Special case for a select of 2 constants that have a difference of 1.
939 // Normally this is done by DAGCombine, but if the select is introduced by
940 // type legalization or op legalization, we miss it. Restricting to SETLT
941 // case for now because that is what signed saturating add/sub need.
942 // FIXME: We don't need the condition to be SETLT or even a SETCC,
943 // but we would probably want to swap the true/false values if the condition
944 // is SETGE/SETLE to avoid an XORI.
945 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
946 CCVal == ISD::SETLT) {
947 const APInt &TrueVal = TrueV->getAsAPIntVal();
948 const APInt &FalseVal = FalseV->getAsAPIntVal();
949 if (TrueVal - 1 == FalseVal)
950 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
951 if (TrueVal + 1 == FalseVal)
952 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
953 }
954
955 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
956 // 1 < x ? x : 1 -> 0 < x ? x : 1
957 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
958 RHS == TrueV && LHS == FalseV) {
959 LHS = DAG.getConstant(0, DL, VT);
960 // 0 <u x is the same as x != 0.
961 if (CCVal == ISD::SETULT) {
962 std::swap(LHS, RHS);
963 CCVal = ISD::SETNE;
964 }
965 }
966
967 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
968 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
969 RHS == FalseV) {
970 RHS = DAG.getConstant(0, DL, VT);
971 }
972
973 SDValue TargetCC = DAG.getCondCode(CCVal);
974
975 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
976 // (select (setcc lhs, rhs, CC), constant, falsev)
977 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
978 std::swap(TrueV, FalseV);
979 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
980 }
981
982 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
983 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
984}
985
986SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
987 SelectionDAG &DAG) const {
988 SDValue CondV = Op.getOperand(1);
989 SDLoc DL(Op);
990 MVT GRLenVT = Subtarget.getGRLenVT();
991
992 if (CondV.getOpcode() == ISD::SETCC) {
993 if (CondV.getOperand(0).getValueType() == GRLenVT) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
999
1000 SDValue TargetCC = DAG.getCondCode(CCVal);
1001 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1002 Op.getOperand(0), LHS, RHS, TargetCC,
1003 Op.getOperand(2));
1004 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1005 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1006 Op.getOperand(0), CondV, Op.getOperand(2));
1007 }
1008 }
1009
1010 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1011 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1012 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1013}
1014
1015SDValue
1016LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1017 SelectionDAG &DAG) const {
1018 SDLoc DL(Op);
1019 MVT OpVT = Op.getSimpleValueType();
1020
1021 SDValue Vector = DAG.getUNDEF(OpVT);
1022 SDValue Val = Op.getOperand(0);
1023 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1024
1025 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1026}
1027
1028SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1029 SelectionDAG &DAG) const {
1030 EVT ResTy = Op->getValueType(0);
1031 SDValue Src = Op->getOperand(0);
1032 SDLoc DL(Op);
1033
1034 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1035 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1036 unsigned int NewEltNum = NewVT.getVectorNumElements();
1037
1038 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1039
1041 for (unsigned int i = 0; i < NewEltNum; i++) {
1042 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1043 DAG.getConstant(i, DL, MVT::i64));
1044 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1045 ? (unsigned)LoongArchISD::BITREV_8B
1046 : (unsigned)ISD::BITREVERSE;
1047 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1048 }
1049 SDValue Res =
1050 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1051
1052 switch (ResTy.getSimpleVT().SimpleTy) {
1053 default:
1054 return SDValue();
1055 case MVT::v16i8:
1056 case MVT::v32i8:
1057 return Res;
1058 case MVT::v8i16:
1059 case MVT::v16i16:
1060 case MVT::v4i32:
1061 case MVT::v8i32: {
1063 for (unsigned int i = 0; i < NewEltNum; i++)
1064 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1065 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1066 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1067 }
1068 }
1069}
1070
1071// Widen element type to get a new mask value (if possible).
1072// For example:
1073// shufflevector <4 x i32> %a, <4 x i32> %b,
1074// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1075// is equivalent to:
1076// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1077// can be lowered to:
1078// VPACKOD_D vr0, vr0, vr1
1080 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1081 unsigned EltBits = VT.getScalarSizeInBits();
1082
1083 if (EltBits > 32 || EltBits == 1)
1084 return SDValue();
1085
1086 SmallVector<int, 8> NewMask;
1087 if (widenShuffleMaskElts(Mask, NewMask)) {
1088 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1089 : MVT::getIntegerVT(EltBits * 2);
1090 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1091 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1092 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1093 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1094 return DAG.getBitcast(
1095 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1096 }
1097 }
1098
1099 return SDValue();
1100}
1101
1102/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1103/// instruction.
1104// The funciton matches elements from one of the input vector shuffled to the
1105// left or right with zeroable elements 'shifted in'. It handles both the
1106// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1107// lane.
1108// Mostly copied from X86.
1109static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1110 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1111 int MaskOffset, const APInt &Zeroable) {
1112 int Size = Mask.size();
1113 unsigned SizeInBits = Size * ScalarSizeInBits;
1114
1115 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1116 for (int i = 0; i < Size; i += Scale)
1117 for (int j = 0; j < Shift; ++j)
1118 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1119 return false;
1120
1121 return true;
1122 };
1123
1124 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1125 int Step = 1) {
1126 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1127 if (!(Mask[i] == -1 || Mask[i] == Low))
1128 return false;
1129 return true;
1130 };
1131
1132 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1133 for (int i = 0; i != Size; i += Scale) {
1134 unsigned Pos = Left ? i + Shift : i;
1135 unsigned Low = Left ? i : i + Shift;
1136 unsigned Len = Scale - Shift;
1137 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1138 return -1;
1139 }
1140
1141 int ShiftEltBits = ScalarSizeInBits * Scale;
1142 bool ByteShift = ShiftEltBits > 64;
1143 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1144 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1145 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1146
1147 // Normalize the scale for byte shifts to still produce an i64 element
1148 // type.
1149 Scale = ByteShift ? Scale / 2 : Scale;
1150
1151 // We need to round trip through the appropriate type for the shift.
1152 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1153 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1154 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1155 return (int)ShiftAmt;
1156 };
1157
1158 unsigned MaxWidth = 128;
1159 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1160 for (int Shift = 1; Shift != Scale; ++Shift)
1161 for (bool Left : {true, false})
1162 if (CheckZeros(Shift, Scale, Left)) {
1163 int ShiftAmt = MatchShift(Shift, Scale, Left);
1164 if (0 < ShiftAmt)
1165 return ShiftAmt;
1166 }
1167
1168 // no match
1169 return -1;
1170}
1171
1172/// Lower VECTOR_SHUFFLE as shift (if possible).
1173///
1174/// For example:
1175/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1176/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1177/// is lowered to:
1178/// (VBSLL_V $v0, $v0, 4)
1179///
1180/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1181/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1182/// is lowered to:
1183/// (VSLLI_D $v0, $v0, 32)
1185 MVT VT, SDValue V1, SDValue V2,
1186 SelectionDAG &DAG,
1187 const LoongArchSubtarget &Subtarget,
1188 const APInt &Zeroable) {
1189 int Size = Mask.size();
1190 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1191
1192 MVT ShiftVT;
1193 SDValue V = V1;
1194 unsigned Opcode;
1195
1196 // Try to match shuffle against V1 shift.
1197 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1198 Mask, 0, Zeroable);
1199
1200 // If V1 failed, try to match shuffle against V2 shift.
1201 if (ShiftAmt < 0) {
1202 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1203 Mask, Size, Zeroable);
1204 V = V2;
1205 }
1206
1207 if (ShiftAmt < 0)
1208 return SDValue();
1209
1210 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1211 "Illegal integer vector type");
1212 V = DAG.getBitcast(ShiftVT, V);
1213 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1214 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1215 return DAG.getBitcast(VT, V);
1216}
1217
1218/// Determine whether a range fits a regular pattern of values.
1219/// This function accounts for the possibility of jumping over the End iterator.
1220template <typename ValType>
1221static bool
1223 unsigned CheckStride,
1225 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1226 auto &I = Begin;
1227
1228 while (I != End) {
1229 if (*I != -1 && *I != ExpectedIndex)
1230 return false;
1231 ExpectedIndex += ExpectedIndexStride;
1232
1233 // Incrementing past End is undefined behaviour so we must increment one
1234 // step at a time and check for End at each step.
1235 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1236 ; // Empty loop body.
1237 }
1238 return true;
1239}
1240
1241/// Compute whether each element of a shuffle is zeroable.
1242///
1243/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1245 SDValue V2, APInt &KnownUndef,
1246 APInt &KnownZero) {
1247 int Size = Mask.size();
1248 KnownUndef = KnownZero = APInt::getZero(Size);
1249
1250 V1 = peekThroughBitcasts(V1);
1251 V2 = peekThroughBitcasts(V2);
1252
1253 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1254 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1255
1256 int VectorSizeInBits = V1.getValueSizeInBits();
1257 int ScalarSizeInBits = VectorSizeInBits / Size;
1258 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1259 (void)ScalarSizeInBits;
1260
1261 for (int i = 0; i < Size; ++i) {
1262 int M = Mask[i];
1263 if (M < 0) {
1264 KnownUndef.setBit(i);
1265 continue;
1266 }
1267 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1268 KnownZero.setBit(i);
1269 continue;
1270 }
1271 }
1272}
1273
1274/// Test whether a shuffle mask is equivalent within each sub-lane.
1275///
1276/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1277/// non-trivial to compute in the face of undef lanes. The representation is
1278/// suitable for use with existing 128-bit shuffles as entries from the second
1279/// vector have been remapped to [LaneSize, 2*LaneSize).
1280static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1281 ArrayRef<int> Mask,
1282 SmallVectorImpl<int> &RepeatedMask) {
1283 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1284 RepeatedMask.assign(LaneSize, -1);
1285 int Size = Mask.size();
1286 for (int i = 0; i < Size; ++i) {
1287 assert(Mask[i] == -1 || Mask[i] >= 0);
1288 if (Mask[i] < 0)
1289 continue;
1290 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1291 // This entry crosses lanes, so there is no way to model this shuffle.
1292 return false;
1293
1294 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1295 // Adjust second vector indices to start at LaneSize instead of Size.
1296 int LocalM =
1297 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1298 if (RepeatedMask[i % LaneSize] < 0)
1299 // This is the first non-undef entry in this slot of a 128-bit lane.
1300 RepeatedMask[i % LaneSize] = LocalM;
1301 else if (RepeatedMask[i % LaneSize] != LocalM)
1302 // Found a mismatch with the repeated mask.
1303 return false;
1304 }
1305 return true;
1306}
1307
1308/// Attempts to match vector shuffle as byte rotation.
1310 ArrayRef<int> Mask) {
1311
1312 SDValue Lo, Hi;
1313 SmallVector<int, 16> RepeatedMask;
1314
1315 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1316 return -1;
1317
1318 int NumElts = RepeatedMask.size();
1319 int Rotation = 0;
1320 int Scale = 16 / NumElts;
1321
1322 for (int i = 0; i < NumElts; ++i) {
1323 int M = RepeatedMask[i];
1324 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1325 "Unexpected mask index.");
1326 if (M < 0)
1327 continue;
1328
1329 // Determine where a rotated vector would have started.
1330 int StartIdx = i - (M % NumElts);
1331 if (StartIdx == 0)
1332 return -1;
1333
1334 // If we found the tail of a vector the rotation must be the missing
1335 // front. If we found the head of a vector, it must be how much of the
1336 // head.
1337 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1338
1339 if (Rotation == 0)
1340 Rotation = CandidateRotation;
1341 else if (Rotation != CandidateRotation)
1342 return -1;
1343
1344 // Compute which value this mask is pointing at.
1345 SDValue MaskV = M < NumElts ? V1 : V2;
1346
1347 // Compute which of the two target values this index should be assigned
1348 // to. This reflects whether the high elements are remaining or the low
1349 // elements are remaining.
1350 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1351
1352 // Either set up this value if we've not encountered it before, or check
1353 // that it remains consistent.
1354 if (!TargetV)
1355 TargetV = MaskV;
1356 else if (TargetV != MaskV)
1357 return -1;
1358 }
1359
1360 // Check that we successfully analyzed the mask, and normalize the results.
1361 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1362 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1363 if (!Lo)
1364 Lo = Hi;
1365 else if (!Hi)
1366 Hi = Lo;
1367
1368 V1 = Lo;
1369 V2 = Hi;
1370
1371 return Rotation * Scale;
1372}
1373
1374/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1375///
1376/// For example:
1377/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1378/// <2 x i32> <i32 3, i32 0>
1379/// is lowered to:
1380/// (VBSRL_V $v1, $v1, 8)
1381/// (VBSLL_V $v0, $v0, 8)
1382/// (VOR_V $v0, $V0, $v1)
1383static SDValue
1385 SDValue V1, SDValue V2, SelectionDAG &DAG,
1386 const LoongArchSubtarget &Subtarget) {
1387
1388 SDValue Lo = V1, Hi = V2;
1389 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1390 if (ByteRotation <= 0)
1391 return SDValue();
1392
1393 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1394 Lo = DAG.getBitcast(ByteVT, Lo);
1395 Hi = DAG.getBitcast(ByteVT, Hi);
1396
1397 int LoByteShift = 16 - ByteRotation;
1398 int HiByteShift = ByteRotation;
1399 MVT GRLenVT = Subtarget.getGRLenVT();
1400
1401 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1402 DAG.getConstant(LoByteShift, DL, GRLenVT));
1403 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1404 DAG.getConstant(HiByteShift, DL, GRLenVT));
1405 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1406}
1407
1408/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1409///
1410/// For example:
1411/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1412/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1413/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1414/// is lowered to:
1415/// (VREPLI $v1, 0)
1416/// (VILVL $v0, $v1, $v0)
1418 ArrayRef<int> Mask, MVT VT,
1419 SDValue V1, SDValue V2,
1420 SelectionDAG &DAG,
1421 const APInt &Zeroable) {
1422 int Bits = VT.getSizeInBits();
1423 int EltBits = VT.getScalarSizeInBits();
1424 int NumElements = VT.getVectorNumElements();
1425
1426 if (Zeroable.isAllOnes())
1427 return DAG.getConstant(0, DL, VT);
1428
1429 // Define a helper function to check a particular ext-scale and lower to it if
1430 // valid.
1431 auto Lower = [&](int Scale) -> SDValue {
1432 SDValue InputV;
1433 bool AnyExt = true;
1434 int Offset = 0;
1435 for (int i = 0; i < NumElements; i++) {
1436 int M = Mask[i];
1437 if (M < 0)
1438 continue;
1439 if (i % Scale != 0) {
1440 // Each of the extended elements need to be zeroable.
1441 if (!Zeroable[i])
1442 return SDValue();
1443
1444 AnyExt = false;
1445 continue;
1446 }
1447
1448 // Each of the base elements needs to be consecutive indices into the
1449 // same input vector.
1450 SDValue V = M < NumElements ? V1 : V2;
1451 M = M % NumElements;
1452 if (!InputV) {
1453 InputV = V;
1454 Offset = M - (i / Scale);
1455
1456 // These offset can't be handled
1457 if (Offset % (NumElements / Scale))
1458 return SDValue();
1459 } else if (InputV != V)
1460 return SDValue();
1461
1462 if (M != (Offset + (i / Scale)))
1463 return SDValue(); // Non-consecutive strided elements.
1464 }
1465
1466 // If we fail to find an input, we have a zero-shuffle which should always
1467 // have already been handled.
1468 if (!InputV)
1469 return SDValue();
1470
1471 do {
1472 unsigned VilVLoHi = LoongArchISD::VILVL;
1473 if (Offset >= (NumElements / 2)) {
1474 VilVLoHi = LoongArchISD::VILVH;
1475 Offset -= (NumElements / 2);
1476 }
1477
1478 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1479 SDValue Ext =
1480 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1481 InputV = DAG.getBitcast(InputVT, InputV);
1482 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1483 Scale /= 2;
1484 EltBits *= 2;
1485 NumElements /= 2;
1486 } while (Scale > 1);
1487 return DAG.getBitcast(VT, InputV);
1488 };
1489
1490 // Each iteration, try extending the elements half as much, but into twice as
1491 // many elements.
1492 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1493 NumExtElements *= 2) {
1494 if (SDValue V = Lower(NumElements / NumExtElements))
1495 return V;
1496 }
1497 return SDValue();
1498}
1499
1500/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1501///
1502/// VREPLVEI performs vector broadcast based on an element specified by an
1503/// integer immediate, with its mask being similar to:
1504/// <x, x, x, ...>
1505/// where x is any valid index.
1506///
1507/// When undef's appear in the mask they are treated as if they were whatever
1508/// value is necessary in order to fit the above form.
1509static SDValue
1511 SDValue V1, SDValue V2, SelectionDAG &DAG,
1512 const LoongArchSubtarget &Subtarget) {
1513 int SplatIndex = -1;
1514 for (const auto &M : Mask) {
1515 if (M != -1) {
1516 SplatIndex = M;
1517 break;
1518 }
1519 }
1520
1521 if (SplatIndex == -1)
1522 return DAG.getUNDEF(VT);
1523
1524 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1525 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1526 APInt Imm(64, SplatIndex);
1527 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1528 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1529 }
1530
1531 return SDValue();
1532}
1533
1534/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1535///
1536/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1537/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1538///
1539/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1540/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1541/// When undef's appear they are treated as if they were whatever value is
1542/// necessary in order to fit the above forms.
1543///
1544/// For example:
1545/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1546/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1547/// i32 7, i32 6, i32 5, i32 4>
1548/// is lowered to:
1549/// (VSHUF4I_H $v0, $v1, 27)
1550/// where the 27 comes from:
1551/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1552static SDValue
1554 SDValue V1, SDValue V2, SelectionDAG &DAG,
1555 const LoongArchSubtarget &Subtarget) {
1556
1557 unsigned SubVecSize = 4;
1558 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1559 SubVecSize = 2;
1560
1561 int SubMask[4] = {-1, -1, -1, -1};
1562 for (unsigned i = 0; i < SubVecSize; ++i) {
1563 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1564 int M = Mask[j];
1565
1566 // Convert from vector index to 4-element subvector index
1567 // If an index refers to an element outside of the subvector then give up
1568 if (M != -1) {
1569 M -= 4 * (j / SubVecSize);
1570 if (M < 0 || M >= 4)
1571 return SDValue();
1572 }
1573
1574 // If the mask has an undef, replace it with the current index.
1575 // Note that it might still be undef if the current index is also undef
1576 if (SubMask[i] == -1)
1577 SubMask[i] = M;
1578 // Check that non-undef values are the same as in the mask. If they
1579 // aren't then give up
1580 else if (M != -1 && M != SubMask[i])
1581 return SDValue();
1582 }
1583 }
1584
1585 // Calculate the immediate. Replace any remaining undefs with zero
1586 APInt Imm(64, 0);
1587 for (int i = SubVecSize - 1; i >= 0; --i) {
1588 int M = SubMask[i];
1589
1590 if (M == -1)
1591 M = 0;
1592
1593 Imm <<= 2;
1594 Imm |= M & 0x3;
1595 }
1596
1597 MVT GRLenVT = Subtarget.getGRLenVT();
1598
1599 // Return vshuf4i.d
1600 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1601 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1602 DAG.getConstant(Imm, DL, GRLenVT));
1603
1604 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1605 DAG.getConstant(Imm, DL, GRLenVT));
1606}
1607
1608/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1609///
1610/// VPACKEV interleaves the even elements from each vector.
1611///
1612/// It is possible to lower into VPACKEV when the mask consists of two of the
1613/// following forms interleaved:
1614/// <0, 2, 4, ...>
1615/// <n, n+2, n+4, ...>
1616/// where n is the number of elements in the vector.
1617/// For example:
1618/// <0, 0, 2, 2, 4, 4, ...>
1619/// <0, n, 2, n+2, 4, n+4, ...>
1620///
1621/// When undef's appear in the mask they are treated as if they were whatever
1622/// value is necessary in order to fit the above forms.
1624 MVT VT, SDValue V1, SDValue V2,
1625 SelectionDAG &DAG) {
1626
1627 const auto &Begin = Mask.begin();
1628 const auto &End = Mask.end();
1629 SDValue OriV1 = V1, OriV2 = V2;
1630
1631 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1632 V1 = OriV1;
1633 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1634 V1 = OriV2;
1635 else
1636 return SDValue();
1637
1638 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1639 V2 = OriV1;
1640 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1641 V2 = OriV2;
1642 else
1643 return SDValue();
1644
1645 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1649///
1650/// VPACKOD interleaves the odd elements from each vector.
1651///
1652/// It is possible to lower into VPACKOD when the mask consists of two of the
1653/// following forms interleaved:
1654/// <1, 3, 5, ...>
1655/// <n+1, n+3, n+5, ...>
1656/// where n is the number of elements in the vector.
1657/// For example:
1658/// <1, 1, 3, 3, 5, 5, ...>
1659/// <1, n+1, 3, n+3, 5, n+5, ...>
1660///
1661/// When undef's appear in the mask they are treated as if they were whatever
1662/// value is necessary in order to fit the above forms.
1664 MVT VT, SDValue V1, SDValue V2,
1665 SelectionDAG &DAG) {
1666
1667 const auto &Begin = Mask.begin();
1668 const auto &End = Mask.end();
1669 SDValue OriV1 = V1, OriV2 = V2;
1670
1671 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1672 V1 = OriV1;
1673 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1674 V1 = OriV2;
1675 else
1676 return SDValue();
1677
1678 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1679 V2 = OriV1;
1680 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1681 V2 = OriV2;
1682 else
1683 return SDValue();
1684
1685 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1686}
1687
1688/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1689///
1690/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1691/// of each vector.
1692///
1693/// It is possible to lower into VILVH when the mask consists of two of the
1694/// following forms interleaved:
1695/// <x, x+1, x+2, ...>
1696/// <n+x, n+x+1, n+x+2, ...>
1697/// where n is the number of elements in the vector and x is half n.
1698/// For example:
1699/// <x, x, x+1, x+1, x+2, x+2, ...>
1700/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1701///
1702/// When undef's appear in the mask they are treated as if they were whatever
1703/// value is necessary in order to fit the above forms.
1705 MVT VT, SDValue V1, SDValue V2,
1706 SelectionDAG &DAG) {
1707
1708 const auto &Begin = Mask.begin();
1709 const auto &End = Mask.end();
1710 unsigned HalfSize = Mask.size() / 2;
1711 SDValue OriV1 = V1, OriV2 = V2;
1712
1713 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1714 V1 = OriV1;
1715 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1716 V1 = OriV2;
1717 else
1718 return SDValue();
1719
1720 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1721 V2 = OriV1;
1722 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1723 1))
1724 V2 = OriV2;
1725 else
1726 return SDValue();
1727
1728 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1729}
1730
1731/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1732///
1733/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1734/// of each vector.
1735///
1736/// It is possible to lower into VILVL when the mask consists of two of the
1737/// following forms interleaved:
1738/// <0, 1, 2, ...>
1739/// <n, n+1, n+2, ...>
1740/// where n is the number of elements in the vector.
1741/// For example:
1742/// <0, 0, 1, 1, 2, 2, ...>
1743/// <0, n, 1, n+1, 2, n+2, ...>
1744///
1745/// When undef's appear in the mask they are treated as if they were whatever
1746/// value is necessary in order to fit the above forms.
1748 MVT VT, SDValue V1, SDValue V2,
1749 SelectionDAG &DAG) {
1750
1751 const auto &Begin = Mask.begin();
1752 const auto &End = Mask.end();
1753 SDValue OriV1 = V1, OriV2 = V2;
1754
1755 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1756 V1 = OriV1;
1757 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1758 V1 = OriV2;
1759 else
1760 return SDValue();
1761
1762 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1763 V2 = OriV1;
1764 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1765 V2 = OriV2;
1766 else
1767 return SDValue();
1768
1769 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1770}
1771
1772/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1773///
1774/// VPICKEV copies the even elements of each vector into the result vector.
1775///
1776/// It is possible to lower into VPICKEV when the mask consists of two of the
1777/// following forms concatenated:
1778/// <0, 2, 4, ...>
1779/// <n, n+2, n+4, ...>
1780/// where n is the number of elements in the vector.
1781/// For example:
1782/// <0, 2, 4, ..., 0, 2, 4, ...>
1783/// <0, 2, 4, ..., n, n+2, n+4, ...>
1784///
1785/// When undef's appear in the mask they are treated as if they were whatever
1786/// value is necessary in order to fit the above forms.
1788 MVT VT, SDValue V1, SDValue V2,
1789 SelectionDAG &DAG) {
1790
1791 const auto &Begin = Mask.begin();
1792 const auto &Mid = Mask.begin() + Mask.size() / 2;
1793 const auto &End = Mask.end();
1794 SDValue OriV1 = V1, OriV2 = V2;
1795
1796 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1797 V1 = OriV1;
1798 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1799 V1 = OriV2;
1800 else
1801 return SDValue();
1802
1803 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1804 V2 = OriV1;
1805 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1806 V2 = OriV2;
1807
1808 else
1809 return SDValue();
1810
1811 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1812}
1813
1814/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1815///
1816/// VPICKOD copies the odd elements of each vector into the result vector.
1817///
1818/// It is possible to lower into VPICKOD when the mask consists of two of the
1819/// following forms concatenated:
1820/// <1, 3, 5, ...>
1821/// <n+1, n+3, n+5, ...>
1822/// where n is the number of elements in the vector.
1823/// For example:
1824/// <1, 3, 5, ..., 1, 3, 5, ...>
1825/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1826///
1827/// When undef's appear in the mask they are treated as if they were whatever
1828/// value is necessary in order to fit the above forms.
1830 MVT VT, SDValue V1, SDValue V2,
1831 SelectionDAG &DAG) {
1832
1833 const auto &Begin = Mask.begin();
1834 const auto &Mid = Mask.begin() + Mask.size() / 2;
1835 const auto &End = Mask.end();
1836 SDValue OriV1 = V1, OriV2 = V2;
1837
1838 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1839 V1 = OriV1;
1840 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1841 V1 = OriV2;
1842 else
1843 return SDValue();
1844
1845 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1846 V2 = OriV1;
1847 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1848 V2 = OriV2;
1849 else
1850 return SDValue();
1851
1852 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1853}
1854
1855/// Lower VECTOR_SHUFFLE into VSHUF.
1856///
1857/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1858/// adding it as an operand to the resulting VSHUF.
1860 MVT VT, SDValue V1, SDValue V2,
1861 SelectionDAG &DAG) {
1862
1864 for (auto M : Mask)
1865 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1866
1867 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1868 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1869
1870 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1871 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1872 // VSHF concatenates the vectors in a bitwise fashion:
1873 // <0b00, 0b01> + <0b10, 0b11> ->
1874 // 0b0100 + 0b1110 -> 0b01001110
1875 // <0b10, 0b11, 0b00, 0b01>
1876 // We must therefore swap the operands to get the correct result.
1877 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1878}
1879
1880/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1881///
1882/// This routine breaks down the specific type of 128-bit shuffle and
1883/// dispatches to the lowering routines accordingly.
1885 SDValue V1, SDValue V2, SelectionDAG &DAG,
1886 const LoongArchSubtarget &Subtarget) {
1887 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1888 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1889 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1890 "Vector type is unsupported for lsx!");
1892 "Two operands have different types!");
1893 assert(VT.getVectorNumElements() == Mask.size() &&
1894 "Unexpected mask size for shuffle!");
1895 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1896
1897 APInt KnownUndef, KnownZero;
1898 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1899 APInt Zeroable = KnownUndef | KnownZero;
1900
1901 SDValue Result;
1902 // TODO: Add more comparison patterns.
1903 if (V2.isUndef()) {
1904 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1905 Subtarget)))
1906 return Result;
1907 if ((Result =
1908 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1909 return Result;
1910
1911 // TODO: This comment may be enabled in the future to better match the
1912 // pattern for instruction selection.
1913 /* V2 = V1; */
1914 }
1915
1916 // It is recommended not to change the pattern comparison order for better
1917 // performance.
1918 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1919 return Result;
1920 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1921 return Result;
1922 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1923 return Result;
1924 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1925 return Result;
1926 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1927 return Result;
1928 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1929 return Result;
1930 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1931 (Result =
1932 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1933 return Result;
1934 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1935 Zeroable)))
1936 return Result;
1937 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1938 Zeroable)))
1939 return Result;
1940 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1941 Subtarget)))
1942 return Result;
1943 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1944 return NewShuffle;
1945 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1946 return Result;
1947 return SDValue();
1948}
1949
1950/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1951///
1952/// It is a XVREPLVEI when the mask is:
1953/// <x, x, x, ..., x+n, x+n, x+n, ...>
1954/// where the number of x is equal to n and n is half the length of vector.
1955///
1956/// When undef's appear in the mask they are treated as if they were whatever
1957/// value is necessary in order to fit the above form.
1958static SDValue
1960 SDValue V1, SDValue V2, SelectionDAG &DAG,
1961 const LoongArchSubtarget &Subtarget) {
1962 int SplatIndex = -1;
1963 for (const auto &M : Mask) {
1964 if (M != -1) {
1965 SplatIndex = M;
1966 break;
1967 }
1968 }
1969
1970 if (SplatIndex == -1)
1971 return DAG.getUNDEF(VT);
1972
1973 const auto &Begin = Mask.begin();
1974 const auto &End = Mask.end();
1975 unsigned HalfSize = Mask.size() / 2;
1976
1977 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1978 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1979 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1980 0)) {
1981 APInt Imm(64, SplatIndex);
1982 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1983 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1984 }
1985
1986 return SDValue();
1987}
1988
1989/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1990static SDValue
1992 SDValue V1, SDValue V2, SelectionDAG &DAG,
1993 const LoongArchSubtarget &Subtarget) {
1994 // When the size is less than or equal to 4, lower cost instructions may be
1995 // used.
1996 if (Mask.size() <= 4)
1997 return SDValue();
1998 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1999}
2000
2001/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2003 MVT VT, SDValue V1, SDValue V2,
2004 SelectionDAG &DAG) {
2005 // LoongArch LASX only have XVPERM_W.
2006 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2007 return SDValue();
2008
2009 unsigned NumElts = VT.getVectorNumElements();
2010 unsigned HalfSize = NumElts / 2;
2011 bool FrontLo = true, FrontHi = true;
2012 bool BackLo = true, BackHi = true;
2013
2014 auto inRange = [](int val, int low, int high) {
2015 return (val == -1) || (val >= low && val < high);
2016 };
2017
2018 for (unsigned i = 0; i < HalfSize; ++i) {
2019 int Fronti = Mask[i];
2020 int Backi = Mask[i + HalfSize];
2021
2022 FrontLo &= inRange(Fronti, 0, HalfSize);
2023 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2024 BackLo &= inRange(Backi, 0, HalfSize);
2025 BackHi &= inRange(Backi, HalfSize, NumElts);
2026 }
2027
2028 // If both the lower and upper 128-bit parts access only one half of the
2029 // vector (either lower or upper), avoid using xvperm.w. The latency of
2030 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2031 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2032 return SDValue();
2033
2035 for (unsigned i = 0; i < NumElts; ++i)
2036 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2037 : DAG.getConstant(Mask[i], DL, MVT::i64));
2038 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2039
2040 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2041}
2042
2043/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2048}
2049
2050/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2052 MVT VT, SDValue V1, SDValue V2,
2053 SelectionDAG &DAG) {
2054 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2055}
2056
2057/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2059 MVT VT, SDValue V1, SDValue V2,
2060 SelectionDAG &DAG) {
2061
2062 const auto &Begin = Mask.begin();
2063 const auto &End = Mask.end();
2064 unsigned HalfSize = Mask.size() / 2;
2065 unsigned LeftSize = HalfSize / 2;
2066 SDValue OriV1 = V1, OriV2 = V2;
2067
2068 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2069 1) &&
2070 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2071 V1 = OriV1;
2072 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2073 Mask.size() + HalfSize - LeftSize, 1) &&
2074 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2075 Mask.size() + HalfSize + LeftSize, 1))
2076 V1 = OriV2;
2077 else
2078 return SDValue();
2079
2080 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2081 1) &&
2082 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2083 1))
2084 V2 = OriV1;
2085 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2086 Mask.size() + HalfSize - LeftSize, 1) &&
2087 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2088 Mask.size() + HalfSize + LeftSize, 1))
2089 V2 = OriV2;
2090 else
2091 return SDValue();
2092
2093 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2094}
2095
2096/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2098 MVT VT, SDValue V1, SDValue V2,
2099 SelectionDAG &DAG) {
2100
2101 const auto &Begin = Mask.begin();
2102 const auto &End = Mask.end();
2103 unsigned HalfSize = Mask.size() / 2;
2104 SDValue OriV1 = V1, OriV2 = V2;
2105
2106 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2107 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2108 V1 = OriV1;
2109 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2110 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2111 Mask.size() + HalfSize, 1))
2112 V1 = OriV2;
2113 else
2114 return SDValue();
2115
2116 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2117 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2118 V2 = OriV1;
2119 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2120 1) &&
2121 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2122 Mask.size() + HalfSize, 1))
2123 V2 = OriV2;
2124 else
2125 return SDValue();
2126
2127 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2128}
2129
2130/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2132 MVT VT, SDValue V1, SDValue V2,
2133 SelectionDAG &DAG) {
2134
2135 const auto &Begin = Mask.begin();
2136 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2137 const auto &Mid = Mask.begin() + Mask.size() / 2;
2138 const auto &RightMid = Mask.end() - Mask.size() / 4;
2139 const auto &End = Mask.end();
2140 unsigned HalfSize = Mask.size() / 2;
2141 SDValue OriV1 = V1, OriV2 = V2;
2142
2143 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2144 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2145 V1 = OriV1;
2146 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2147 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2148 V1 = OriV2;
2149 else
2150 return SDValue();
2151
2152 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2153 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2154 V2 = OriV1;
2155 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2156 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2157 V2 = OriV2;
2158
2159 else
2160 return SDValue();
2161
2162 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2163}
2164
2165/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2167 MVT VT, SDValue V1, SDValue V2,
2168 SelectionDAG &DAG) {
2169
2170 const auto &Begin = Mask.begin();
2171 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2172 const auto &Mid = Mask.begin() + Mask.size() / 2;
2173 const auto &RightMid = Mask.end() - Mask.size() / 4;
2174 const auto &End = Mask.end();
2175 unsigned HalfSize = Mask.size() / 2;
2176 SDValue OriV1 = V1, OriV2 = V2;
2177
2178 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2179 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2180 V1 = OriV1;
2181 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2182 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2183 2))
2184 V1 = OriV2;
2185 else
2186 return SDValue();
2187
2188 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2189 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2190 V2 = OriV1;
2191 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2192 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2193 2))
2194 V2 = OriV2;
2195 else
2196 return SDValue();
2197
2198 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2199}
2200
2201/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2203 MVT VT, SDValue V1, SDValue V2,
2204 SelectionDAG &DAG) {
2205
2206 int MaskSize = Mask.size();
2207 int HalfSize = Mask.size() / 2;
2208 const auto &Begin = Mask.begin();
2209 const auto &Mid = Mask.begin() + HalfSize;
2210 const auto &End = Mask.end();
2211
2212 // VECTOR_SHUFFLE concatenates the vectors:
2213 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2214 // shuffling ->
2215 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2216 //
2217 // XVSHUF concatenates the vectors:
2218 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2219 // shuffling ->
2220 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2221 SmallVector<SDValue, 8> MaskAlloc;
2222 for (auto it = Begin; it < Mid; it++) {
2223 if (*it < 0) // UNDEF
2224 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2225 else if ((*it >= 0 && *it < HalfSize) ||
2226 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2227 int M = *it < HalfSize ? *it : *it - HalfSize;
2228 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2229 } else
2230 return SDValue();
2231 }
2232 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2233
2234 for (auto it = Mid; it < End; it++) {
2235 if (*it < 0) // UNDEF
2236 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2237 else if ((*it >= HalfSize && *it < MaskSize) ||
2238 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2239 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2240 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2241 } else
2242 return SDValue();
2243 }
2244 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2245
2246 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2247 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2248 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2249}
2250
2251/// Shuffle vectors by lane to generate more optimized instructions.
2252/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2253///
2254/// Therefore, except for the following four cases, other cases are regarded
2255/// as cross-lane shuffles, where optimization is relatively limited.
2256///
2257/// - Shuffle high, low lanes of two inputs vector
2258/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2259/// - Shuffle low, high lanes of two inputs vector
2260/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2261/// - Shuffle low, low lanes of two inputs vector
2262/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2263/// - Shuffle high, high lanes of two inputs vector
2264/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2265///
2266/// The first case is the closest to LoongArch instructions and the other
2267/// cases need to be converted to it for processing.
2268///
2269/// This function may modify V1, V2 and Mask
2271 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2272 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2273
2274 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2275
2276 int MaskSize = Mask.size();
2277 int HalfSize = Mask.size() / 2;
2278 MVT GRLenVT = Subtarget.getGRLenVT();
2279
2280 HalfMaskType preMask = None, postMask = None;
2281
2282 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2283 return M < 0 || (M >= 0 && M < HalfSize) ||
2284 (M >= MaskSize && M < MaskSize + HalfSize);
2285 }))
2286 preMask = HighLaneTy;
2287 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2288 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2289 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2290 }))
2291 preMask = LowLaneTy;
2292
2293 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2294 return M < 0 || (M >= 0 && M < HalfSize) ||
2295 (M >= MaskSize && M < MaskSize + HalfSize);
2296 }))
2297 postMask = HighLaneTy;
2298 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2299 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2300 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2301 }))
2302 postMask = LowLaneTy;
2303
2304 // The pre-half of mask is high lane type, and the post-half of mask
2305 // is low lane type, which is closest to the LoongArch instructions.
2306 //
2307 // Note: In the LoongArch architecture, the high lane of mask corresponds
2308 // to the lower 128-bit of vector register, and the low lane of mask
2309 // corresponds the higher 128-bit of vector register.
2310 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2311 return;
2312 }
2313 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2314 V1 = DAG.getBitcast(MVT::v4i64, V1);
2315 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2316 DAG.getConstant(0b01001110, DL, GRLenVT));
2317 V1 = DAG.getBitcast(VT, V1);
2318
2319 if (!V2.isUndef()) {
2320 V2 = DAG.getBitcast(MVT::v4i64, V2);
2321 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2322 DAG.getConstant(0b01001110, DL, GRLenVT));
2323 V2 = DAG.getBitcast(VT, V2);
2324 }
2325
2326 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2327 *it = *it < 0 ? *it : *it - HalfSize;
2328 }
2329 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2330 *it = *it < 0 ? *it : *it + HalfSize;
2331 }
2332 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2333 V1 = DAG.getBitcast(MVT::v4i64, V1);
2334 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2335 DAG.getConstant(0b11101110, DL, GRLenVT));
2336 V1 = DAG.getBitcast(VT, V1);
2337
2338 if (!V2.isUndef()) {
2339 V2 = DAG.getBitcast(MVT::v4i64, V2);
2340 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2341 DAG.getConstant(0b11101110, DL, GRLenVT));
2342 V2 = DAG.getBitcast(VT, V2);
2343 }
2344
2345 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2346 *it = *it < 0 ? *it : *it - HalfSize;
2347 }
2348 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2349 V1 = DAG.getBitcast(MVT::v4i64, V1);
2350 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2351 DAG.getConstant(0b01000100, DL, GRLenVT));
2352 V1 = DAG.getBitcast(VT, V1);
2353
2354 if (!V2.isUndef()) {
2355 V2 = DAG.getBitcast(MVT::v4i64, V2);
2356 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2357 DAG.getConstant(0b01000100, DL, GRLenVT));
2358 V2 = DAG.getBitcast(VT, V2);
2359 }
2360
2361 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2362 *it = *it < 0 ? *it : *it + HalfSize;
2363 }
2364 } else { // cross-lane
2365 return;
2366 }
2367}
2368
2369/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2370/// Only for 256-bit vector.
2371///
2372/// For example:
2373/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2374/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2375/// is lowerded to:
2376/// (XVPERMI $xr2, $xr0, 78)
2377/// (XVSHUF $xr1, $xr2, $xr0)
2378/// (XVORI $xr0, $xr1, 0)
2380 ArrayRef<int> Mask,
2381 MVT VT, SDValue V1,
2382 SDValue V2,
2383 SelectionDAG &DAG) {
2384 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2385 int Size = Mask.size();
2386 int LaneSize = Size / 2;
2387
2388 bool LaneCrossing[2] = {false, false};
2389 for (int i = 0; i < Size; ++i)
2390 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2391 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2392
2393 // Ensure that all lanes ared involved.
2394 if (!LaneCrossing[0] && !LaneCrossing[1])
2395 return SDValue();
2396
2397 SmallVector<int> InLaneMask;
2398 InLaneMask.assign(Mask.begin(), Mask.end());
2399 for (int i = 0; i < Size; ++i) {
2400 int &M = InLaneMask[i];
2401 if (M < 0)
2402 continue;
2403 if (((M % Size) / LaneSize) != (i / LaneSize))
2404 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2405 }
2406
2407 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2408 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2409 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2410 Flipped = DAG.getBitcast(VT, Flipped);
2411 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2412}
2413
2414/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2415///
2416/// This routine breaks down the specific type of 256-bit shuffle and
2417/// dispatches to the lowering routines accordingly.
2419 SDValue V1, SDValue V2, SelectionDAG &DAG,
2420 const LoongArchSubtarget &Subtarget) {
2421 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2422 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2423 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2424 "Vector type is unsupported for lasx!");
2426 "Two operands have different types!");
2427 assert(VT.getVectorNumElements() == Mask.size() &&
2428 "Unexpected mask size for shuffle!");
2429 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2430 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2431
2432 // canonicalize non cross-lane shuffle vector
2433 SmallVector<int> NewMask(Mask);
2434 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2435
2436 APInt KnownUndef, KnownZero;
2437 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2438 APInt Zeroable = KnownUndef | KnownZero;
2439
2440 SDValue Result;
2441 // TODO: Add more comparison patterns.
2442 if (V2.isUndef()) {
2443 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2444 Subtarget)))
2445 return Result;
2446 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2447 Subtarget)))
2448 return Result;
2449 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2450 return Result;
2451 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2452 V1, V2, DAG)))
2453 return Result;
2454
2455 // TODO: This comment may be enabled in the future to better match the
2456 // pattern for instruction selection.
2457 /* V2 = V1; */
2458 }
2459
2460 // It is recommended not to change the pattern comparison order for better
2461 // performance.
2462 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2463 return Result;
2464 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2465 return Result;
2466 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2467 return Result;
2468 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2469 return Result;
2470 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2471 return Result;
2472 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2473 return Result;
2474 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2475 Subtarget, Zeroable)))
2476 return Result;
2477 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2478 Subtarget)))
2479 return Result;
2480 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2481 return NewShuffle;
2482 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2483 return Result;
2484
2485 return SDValue();
2486}
2487
2488SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2489 SelectionDAG &DAG) const {
2490 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2491 ArrayRef<int> OrigMask = SVOp->getMask();
2492 SDValue V1 = Op.getOperand(0);
2493 SDValue V2 = Op.getOperand(1);
2494 MVT VT = Op.getSimpleValueType();
2495 int NumElements = VT.getVectorNumElements();
2496 SDLoc DL(Op);
2497
2498 bool V1IsUndef = V1.isUndef();
2499 bool V2IsUndef = V2.isUndef();
2500 if (V1IsUndef && V2IsUndef)
2501 return DAG.getUNDEF(VT);
2502
2503 // When we create a shuffle node we put the UNDEF node to second operand,
2504 // but in some cases the first operand may be transformed to UNDEF.
2505 // In this case we should just commute the node.
2506 if (V1IsUndef)
2507 return DAG.getCommutedVectorShuffle(*SVOp);
2508
2509 // Check for non-undef masks pointing at an undef vector and make the masks
2510 // undef as well. This makes it easier to match the shuffle based solely on
2511 // the mask.
2512 if (V2IsUndef &&
2513 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2514 SmallVector<int, 8> NewMask(OrigMask);
2515 for (int &M : NewMask)
2516 if (M >= NumElements)
2517 M = -1;
2518 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2519 }
2520
2521 // Check for illegal shuffle mask element index values.
2522 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2523 (void)MaskUpperLimit;
2524 assert(llvm::all_of(OrigMask,
2525 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2526 "Out of bounds shuffle index");
2527
2528 // For each vector width, delegate to a specialized lowering routine.
2529 if (VT.is128BitVector())
2530 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2531
2532 if (VT.is256BitVector())
2533 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2534
2535 return SDValue();
2536}
2537
2538SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2539 SelectionDAG &DAG) const {
2540 // Custom lower to ensure the libcall return is passed in an FPR on hard
2541 // float ABIs.
2542 SDLoc DL(Op);
2543 MakeLibCallOptions CallOptions;
2544 SDValue Op0 = Op.getOperand(0);
2545 SDValue Chain = SDValue();
2546 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2547 SDValue Res;
2548 std::tie(Res, Chain) =
2549 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2550 if (Subtarget.is64Bit())
2551 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2552 return DAG.getBitcast(MVT::i32, Res);
2553}
2554
2555SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2556 SelectionDAG &DAG) const {
2557 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2558 // float ABIs.
2559 SDLoc DL(Op);
2560 MakeLibCallOptions CallOptions;
2561 SDValue Op0 = Op.getOperand(0);
2562 SDValue Chain = SDValue();
2563 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2564 DL, MVT::f32, Op0)
2565 : DAG.getBitcast(MVT::f32, Op0);
2566 SDValue Res;
2567 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2568 CallOptions, DL, Chain);
2569 return Res;
2570}
2571
2572SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2573 SelectionDAG &DAG) const {
2574 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2575 SDLoc DL(Op);
2576 MakeLibCallOptions CallOptions;
2577 RTLIB::Libcall LC =
2578 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2579 SDValue Res =
2580 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2581 if (Subtarget.is64Bit())
2582 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2583 return DAG.getBitcast(MVT::i32, Res);
2584}
2585
2586SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2587 SelectionDAG &DAG) const {
2588 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2589 MVT VT = Op.getSimpleValueType();
2590 SDLoc DL(Op);
2591 Op = DAG.getNode(
2592 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2593 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2594 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2595 DL, MVT::f32, Op)
2596 : DAG.getBitcast(MVT::f32, Op);
2597 if (VT != MVT::f32)
2598 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2599 return Res;
2600}
2601
2602// Lower BUILD_VECTOR as broadcast load (if possible).
2603// For example:
2604// %a = load i8, ptr %ptr
2605// %b = build_vector %a, %a, %a, %a
2606// is lowered to :
2607// (VLDREPL_B $a0, 0)
2609 const SDLoc &DL,
2610 SelectionDAG &DAG) {
2611 MVT VT = BVOp->getSimpleValueType(0);
2612 int NumOps = BVOp->getNumOperands();
2613
2614 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2615 "Unsupported vector type for broadcast.");
2616
2617 SDValue IdentitySrc;
2618 bool IsIdeneity = true;
2619
2620 for (int i = 0; i != NumOps; i++) {
2621 SDValue Op = BVOp->getOperand(i);
2622 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2623 IsIdeneity = false;
2624 break;
2625 }
2626 IdentitySrc = BVOp->getOperand(0);
2627 }
2628
2629 // make sure that this load is valid and only has one user.
2630 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2631 return SDValue();
2632
2633 auto *LN = cast<LoadSDNode>(IdentitySrc);
2634 auto ExtType = LN->getExtensionType();
2635
2636 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2637 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2638 SDVTList Tys =
2639 LN->isIndexed()
2640 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2641 : DAG.getVTList(VT, MVT::Other);
2642 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2643 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2644 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2645 return BCast;
2646 }
2647 return SDValue();
2648}
2649
2650SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2651 SelectionDAG &DAG) const {
2652 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2653 EVT ResTy = Op->getValueType(0);
2654 unsigned NumElts = ResTy.getVectorNumElements();
2655 SDLoc DL(Op);
2656 APInt SplatValue, SplatUndef;
2657 unsigned SplatBitSize;
2658 bool HasAnyUndefs;
2659 bool IsConstant = false;
2660 bool UseSameConstant = true;
2661 SDValue ConstantValue;
2662 bool Is128Vec = ResTy.is128BitVector();
2663 bool Is256Vec = ResTy.is256BitVector();
2664
2665 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2666 (!Subtarget.hasExtLASX() || !Is256Vec))
2667 return SDValue();
2668
2669 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2670 return Result;
2671
2672 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2673 /*MinSplatBits=*/8) &&
2674 SplatBitSize <= 64) {
2675 // We can only cope with 8, 16, 32, or 64-bit elements.
2676 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2677 SplatBitSize != 64)
2678 return SDValue();
2679
2680 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2681 // We can only handle 64-bit elements that are within
2682 // the signed 32-bit range on 32-bit targets.
2683 if (!SplatValue.isSignedIntN(32))
2684 return SDValue();
2685 if ((Is128Vec && ResTy == MVT::v4i32) ||
2686 (Is256Vec && ResTy == MVT::v8i32))
2687 return Op;
2688 }
2689
2690 EVT ViaVecTy;
2691
2692 switch (SplatBitSize) {
2693 default:
2694 return SDValue();
2695 case 8:
2696 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2697 break;
2698 case 16:
2699 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2700 break;
2701 case 32:
2702 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2703 break;
2704 case 64:
2705 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2706 break;
2707 }
2708
2709 // SelectionDAG::getConstant will promote SplatValue appropriately.
2710 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2711
2712 // Bitcast to the type we originally wanted.
2713 if (ViaVecTy != ResTy)
2714 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2715
2716 return Result;
2717 }
2718
2719 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2720 return Op;
2721
2722 for (unsigned i = 0; i < NumElts; ++i) {
2723 SDValue Opi = Node->getOperand(i);
2724 if (isIntOrFPConstant(Opi)) {
2725 IsConstant = true;
2726 if (!ConstantValue.getNode())
2727 ConstantValue = Opi;
2728 else if (ConstantValue != Opi)
2729 UseSameConstant = false;
2730 }
2731 }
2732
2733 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2734 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2735 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2736 for (unsigned i = 0; i < NumElts; ++i) {
2737 SDValue Opi = Node->getOperand(i);
2738 if (!isIntOrFPConstant(Opi))
2739 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2740 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2741 }
2742 return Result;
2743 }
2744
2745 if (!IsConstant) {
2746 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2747 // The resulting code is the same length as the expansion, but it doesn't
2748 // use memory operations.
2749 assert(ResTy.isVector());
2750
2751 SDValue Op0 = Node->getOperand(0);
2752 SDValue Vector = DAG.getUNDEF(ResTy);
2753
2754 if (!Op0.isUndef())
2755 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2756 for (unsigned i = 1; i < NumElts; ++i) {
2757 SDValue Opi = Node->getOperand(i);
2758 if (Opi.isUndef())
2759 continue;
2760 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2761 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2762 }
2763 return Vector;
2764 }
2765
2766 return SDValue();
2767}
2768
2769SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2770 SelectionDAG &DAG) const {
2771 SDLoc DL(Op);
2772 MVT ResVT = Op.getSimpleValueType();
2773 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2774
2775 unsigned NumOperands = Op.getNumOperands();
2776 unsigned NumFreezeUndef = 0;
2777 unsigned NumZero = 0;
2778 unsigned NumNonZero = 0;
2779 unsigned NonZeros = 0;
2780 SmallSet<SDValue, 4> Undefs;
2781 for (unsigned i = 0; i != NumOperands; ++i) {
2782 SDValue SubVec = Op.getOperand(i);
2783 if (SubVec.isUndef())
2784 continue;
2785 if (ISD::isFreezeUndef(SubVec.getNode())) {
2786 // If the freeze(undef) has multiple uses then we must fold to zero.
2787 if (SubVec.hasOneUse()) {
2788 ++NumFreezeUndef;
2789 } else {
2790 ++NumZero;
2791 Undefs.insert(SubVec);
2792 }
2793 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2794 ++NumZero;
2795 else {
2796 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2797 NonZeros |= 1 << i;
2798 ++NumNonZero;
2799 }
2800 }
2801
2802 // If we have more than 2 non-zeros, build each half separately.
2803 if (NumNonZero > 2) {
2804 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2805 ArrayRef<SDUse> Ops = Op->ops();
2806 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2807 Ops.slice(0, NumOperands / 2));
2808 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2809 Ops.slice(NumOperands / 2));
2810 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2811 }
2812
2813 // Otherwise, build it up through insert_subvectors.
2814 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2815 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2816 : DAG.getUNDEF(ResVT));
2817
2818 // Replace Undef operands with ZeroVector.
2819 for (SDValue U : Undefs)
2820 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2821
2822 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2823 unsigned NumSubElems = SubVT.getVectorNumElements();
2824 for (unsigned i = 0; i != NumOperands; ++i) {
2825 if ((NonZeros & (1 << i)) == 0)
2826 continue;
2827
2828 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2829 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2830 }
2831
2832 return Vec;
2833}
2834
2835SDValue
2836LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2837 SelectionDAG &DAG) const {
2838 MVT EltVT = Op.getSimpleValueType();
2839 SDValue Vec = Op->getOperand(0);
2840 EVT VecTy = Vec->getValueType(0);
2841 SDValue Idx = Op->getOperand(1);
2842 SDLoc DL(Op);
2843 MVT GRLenVT = Subtarget.getGRLenVT();
2844
2845 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
2846
2847 if (isa<ConstantSDNode>(Idx))
2848 return Op;
2849
2850 switch (VecTy.getSimpleVT().SimpleTy) {
2851 default:
2852 llvm_unreachable("Unexpected type");
2853 case MVT::v32i8:
2854 case MVT::v16i16:
2855 case MVT::v4i64:
2856 case MVT::v4f64: {
2857 // Extract the high half subvector and place it to the low half of a new
2858 // vector. It doesn't matter what the high half of the new vector is.
2859 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2860 SDValue VecHi =
2861 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2862 SDValue TmpVec =
2863 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2864 VecHi, DAG.getConstant(0, DL, GRLenVT));
2865
2866 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
2867 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
2868 // desired element.
2869 SDValue IdxCp =
2870 DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
2871 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
2872 SDValue MaskVec =
2873 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
2874 SDValue ResVec =
2875 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
2876
2877 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
2878 DAG.getConstant(0, DL, GRLenVT));
2879 }
2880 case MVT::v8i32:
2881 case MVT::v8f32: {
2882 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2883 SDValue SplatValue =
2884 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2885
2886 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2887 DAG.getConstant(0, DL, GRLenVT));
2888 }
2889 }
2890}
2891
2892SDValue
2893LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2894 SelectionDAG &DAG) const {
2895 MVT VT = Op.getSimpleValueType();
2896 MVT EltVT = VT.getVectorElementType();
2897 unsigned NumElts = VT.getVectorNumElements();
2898 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2899 SDLoc DL(Op);
2900 SDValue Op0 = Op.getOperand(0);
2901 SDValue Op1 = Op.getOperand(1);
2902 SDValue Op2 = Op.getOperand(2);
2903
2904 if (isa<ConstantSDNode>(Op2))
2905 return Op;
2906
2907 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2908 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2909
2910 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2911 return SDValue();
2912
2913 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2914 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2915
2916 SmallVector<SDValue, 32> RawIndices;
2917 for (unsigned i = 0; i < NumElts; ++i)
2918 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2919 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2920
2921 // insert vec, elt, idx
2922 // =>
2923 // select (splatidx == {0,1,2...}) ? splatelt : vec
2924 SDValue SelectCC =
2925 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2926 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2927}
2928
2929SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2930 SelectionDAG &DAG) const {
2931 SDLoc DL(Op);
2932 SyncScope::ID FenceSSID =
2933 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2934
2935 // singlethread fences only synchronize with signal handlers on the same
2936 // thread and thus only need to preserve instruction order, not actually
2937 // enforce memory ordering.
2938 if (FenceSSID == SyncScope::SingleThread)
2939 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2940 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2941
2942 return Op;
2943}
2944
2945SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2946 SelectionDAG &DAG) const {
2947
2948 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2949 DAG.getContext()->emitError(
2950 "On LA64, only 64-bit registers can be written.");
2951 return Op.getOperand(0);
2952 }
2953
2954 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2955 DAG.getContext()->emitError(
2956 "On LA32, only 32-bit registers can be written.");
2957 return Op.getOperand(0);
2958 }
2959
2960 return Op;
2961}
2962
2963SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2964 SelectionDAG &DAG) const {
2965 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2966 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2967 "be a constant integer");
2968 return SDValue();
2969 }
2970
2971 MachineFunction &MF = DAG.getMachineFunction();
2973 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2974 EVT VT = Op.getValueType();
2975 SDLoc DL(Op);
2976 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2977 unsigned Depth = Op.getConstantOperandVal(0);
2978 int GRLenInBytes = Subtarget.getGRLen() / 8;
2979
2980 while (Depth--) {
2981 int Offset = -(GRLenInBytes * 2);
2982 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2983 DAG.getSignedConstant(Offset, DL, VT));
2984 FrameAddr =
2985 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2986 }
2987 return FrameAddr;
2988}
2989
2990SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2991 SelectionDAG &DAG) const {
2992 // Currently only support lowering return address for current frame.
2993 if (Op.getConstantOperandVal(0) != 0) {
2994 DAG.getContext()->emitError(
2995 "return address can only be determined for the current frame");
2996 return SDValue();
2997 }
2998
2999 MachineFunction &MF = DAG.getMachineFunction();
3001 MVT GRLenVT = Subtarget.getGRLenVT();
3002
3003 // Return the value of the return address register, marking it an implicit
3004 // live-in.
3005 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3006 getRegClassFor(GRLenVT));
3007 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3008}
3009
3010SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3011 SelectionDAG &DAG) const {
3012 MachineFunction &MF = DAG.getMachineFunction();
3013 auto Size = Subtarget.getGRLen() / 8;
3014 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3015 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3016}
3017
3018SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3019 SelectionDAG &DAG) const {
3020 MachineFunction &MF = DAG.getMachineFunction();
3021 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3022
3023 SDLoc DL(Op);
3024 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3026
3027 // vastart just stores the address of the VarArgsFrameIndex slot into the
3028 // memory location argument.
3029 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3030 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3031 MachinePointerInfo(SV));
3032}
3033
3034SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3035 SelectionDAG &DAG) const {
3036 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3037 !Subtarget.hasBasicD() && "unexpected target features");
3038
3039 SDLoc DL(Op);
3040 SDValue Op0 = Op.getOperand(0);
3041 if (Op0->getOpcode() == ISD::AND) {
3042 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3043 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3044 return Op;
3045 }
3046
3047 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3048 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3049 Op0.getConstantOperandVal(2) == UINT64_C(0))
3050 return Op;
3051
3052 if (Op0.getOpcode() == ISD::AssertZext &&
3053 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3054 return Op;
3055
3056 EVT OpVT = Op0.getValueType();
3057 EVT RetVT = Op.getValueType();
3058 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3059 MakeLibCallOptions CallOptions;
3060 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3061 SDValue Chain = SDValue();
3063 std::tie(Result, Chain) =
3064 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3065 return Result;
3066}
3067
3068SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3069 SelectionDAG &DAG) const {
3070 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3071 !Subtarget.hasBasicD() && "unexpected target features");
3072
3073 SDLoc DL(Op);
3074 SDValue Op0 = Op.getOperand(0);
3075
3076 if ((Op0.getOpcode() == ISD::AssertSext ||
3078 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3079 return Op;
3080
3081 EVT OpVT = Op0.getValueType();
3082 EVT RetVT = Op.getValueType();
3083 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3084 MakeLibCallOptions CallOptions;
3085 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3086 SDValue Chain = SDValue();
3088 std::tie(Result, Chain) =
3089 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3090 return Result;
3091}
3092
3093SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3094 SelectionDAG &DAG) const {
3095
3096 SDLoc DL(Op);
3097 EVT VT = Op.getValueType();
3098 SDValue Op0 = Op.getOperand(0);
3099 EVT Op0VT = Op0.getValueType();
3100
3101 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3102 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3103 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3104 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3105 }
3106 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3107 SDValue Lo, Hi;
3108 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3109 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3110 }
3111 return Op;
3112}
3113
3114SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3115 SelectionDAG &DAG) const {
3116
3117 SDLoc DL(Op);
3118 SDValue Op0 = Op.getOperand(0);
3119
3120 if (Op0.getValueType() == MVT::f16)
3121 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3122
3123 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3124 !Subtarget.hasBasicD()) {
3125 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3126 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3127 }
3128
3129 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3130 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3131 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3132}
3133
3135 SelectionDAG &DAG, unsigned Flags) {
3136 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3137}
3138
3140 SelectionDAG &DAG, unsigned Flags) {
3141 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3142 Flags);
3143}
3144
3146 SelectionDAG &DAG, unsigned Flags) {
3147 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3148 N->getOffset(), Flags);
3149}
3150
3152 SelectionDAG &DAG, unsigned Flags) {
3153 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3154}
3155
3156template <class NodeTy>
3157SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3159 bool IsLocal) const {
3160 SDLoc DL(N);
3161 EVT Ty = getPointerTy(DAG.getDataLayout());
3162 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3163 SDValue Load;
3164
3165 switch (M) {
3166 default:
3167 report_fatal_error("Unsupported code model");
3168
3169 case CodeModel::Large: {
3170 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3171
3172 // This is not actually used, but is necessary for successfully matching
3173 // the PseudoLA_*_LARGE nodes.
3174 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3175 if (IsLocal) {
3176 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3177 // eventually becomes the desired 5-insn code sequence.
3178 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3179 Tmp, Addr),
3180 0);
3181 } else {
3182 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3183 // eventually becomes the desired 5-insn code sequence.
3184 Load = SDValue(
3185 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3186 0);
3187 }
3188 break;
3189 }
3190
3191 case CodeModel::Small:
3192 case CodeModel::Medium:
3193 if (IsLocal) {
3194 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3195 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3196 Load = SDValue(
3197 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3198 } else {
3199 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3200 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3201 Load =
3202 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3203 }
3204 }
3205
3206 if (!IsLocal) {
3207 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3208 MachineFunction &MF = DAG.getMachineFunction();
3209 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3213 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3214 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3215 }
3216
3217 return Load;
3218}
3219
3220SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3221 SelectionDAG &DAG) const {
3222 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3223 DAG.getTarget().getCodeModel());
3224}
3225
3226SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3227 SelectionDAG &DAG) const {
3228 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3229 DAG.getTarget().getCodeModel());
3230}
3231
3232SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3233 SelectionDAG &DAG) const {
3234 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3235 DAG.getTarget().getCodeModel());
3236}
3237
3238SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3239 SelectionDAG &DAG) const {
3240 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3241 assert(N->getOffset() == 0 && "unexpected offset in global node");
3242 auto CM = DAG.getTarget().getCodeModel();
3243 const GlobalValue *GV = N->getGlobal();
3244
3245 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3246 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3247 CM = *GCM;
3248 }
3249
3250 return getAddr(N, DAG, CM, GV->isDSOLocal());
3251}
3252
3253SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3254 SelectionDAG &DAG,
3255 unsigned Opc, bool UseGOT,
3256 bool Large) const {
3257 SDLoc DL(N);
3258 EVT Ty = getPointerTy(DAG.getDataLayout());
3259 MVT GRLenVT = Subtarget.getGRLenVT();
3260
3261 // This is not actually used, but is necessary for successfully matching the
3262 // PseudoLA_*_LARGE nodes.
3263 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3264 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3265
3266 // Only IE needs an extra argument for large code model.
3267 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3268 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3269 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3270
3271 // If it is LE for normal/medium code model, the add tp operation will occur
3272 // during the pseudo-instruction expansion.
3273 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3274 return Offset;
3275
3276 if (UseGOT) {
3277 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3278 MachineFunction &MF = DAG.getMachineFunction();
3279 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3283 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3284 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3285 }
3286
3287 // Add the thread pointer.
3288 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3289 DAG.getRegister(LoongArch::R2, GRLenVT));
3290}
3291
3292SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3293 SelectionDAG &DAG,
3294 unsigned Opc,
3295 bool Large) const {
3296 SDLoc DL(N);
3297 EVT Ty = getPointerTy(DAG.getDataLayout());
3298 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3299
3300 // This is not actually used, but is necessary for successfully matching the
3301 // PseudoLA_*_LARGE nodes.
3302 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3303
3304 // Use a PC-relative addressing mode to access the dynamic GOT address.
3305 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3306 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3307 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3308
3309 // Prepare argument list to generate call.
3311 Args.emplace_back(Load, CallTy);
3312
3313 // Setup call to __tls_get_addr.
3314 TargetLowering::CallLoweringInfo CLI(DAG);
3315 CLI.setDebugLoc(DL)
3316 .setChain(DAG.getEntryNode())
3317 .setLibCallee(CallingConv::C, CallTy,
3318 DAG.getExternalSymbol("__tls_get_addr", Ty),
3319 std::move(Args));
3320
3321 return LowerCallTo(CLI).first;
3322}
3323
3324SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3325 SelectionDAG &DAG, unsigned Opc,
3326 bool Large) const {
3327 SDLoc DL(N);
3328 EVT Ty = getPointerTy(DAG.getDataLayout());
3329 const GlobalValue *GV = N->getGlobal();
3330
3331 // This is not actually used, but is necessary for successfully matching the
3332 // PseudoLA_*_LARGE nodes.
3333 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3334
3335 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3336 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3337 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3338 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3339 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3340}
3341
3342SDValue
3343LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3344 SelectionDAG &DAG) const {
3347 report_fatal_error("In GHC calling convention TLS is not supported");
3348
3349 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3350 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3351
3352 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3353 assert(N->getOffset() == 0 && "unexpected offset in global node");
3354
3355 if (DAG.getTarget().useEmulatedTLS())
3356 reportFatalUsageError("the emulated TLS is prohibited");
3357
3358 bool IsDesc = DAG.getTarget().useTLSDESC();
3359
3360 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3362 // In this model, application code calls the dynamic linker function
3363 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3364 // runtime.
3365 if (!IsDesc)
3366 return getDynamicTLSAddr(N, DAG,
3367 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3368 : LoongArch::PseudoLA_TLS_GD,
3369 Large);
3370 break;
3372 // Same as GeneralDynamic, except for assembly modifiers and relocation
3373 // records.
3374 if (!IsDesc)
3375 return getDynamicTLSAddr(N, DAG,
3376 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3377 : LoongArch::PseudoLA_TLS_LD,
3378 Large);
3379 break;
3381 // This model uses the GOT to resolve TLS offsets.
3382 return getStaticTLSAddr(N, DAG,
3383 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3384 : LoongArch::PseudoLA_TLS_IE,
3385 /*UseGOT=*/true, Large);
3387 // This model is used when static linking as the TLS offsets are resolved
3388 // during program linking.
3389 //
3390 // This node doesn't need an extra argument for the large code model.
3391 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3392 /*UseGOT=*/false, Large);
3393 }
3394
3395 return getTLSDescAddr(N, DAG,
3396 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3397 : LoongArch::PseudoLA_TLS_DESC,
3398 Large);
3399}
3400
3401template <unsigned N>
3403 SelectionDAG &DAG, bool IsSigned = false) {
3404 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3405 // Check the ImmArg.
3406 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3407 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3408 DAG.getContext()->emitError(Op->getOperationName(0) +
3409 ": argument out of range.");
3410 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3411 }
3412 return SDValue();
3413}
3414
3415SDValue
3416LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3417 SelectionDAG &DAG) const {
3418 switch (Op.getConstantOperandVal(0)) {
3419 default:
3420 return SDValue(); // Don't custom lower most intrinsics.
3421 case Intrinsic::thread_pointer: {
3422 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3423 return DAG.getRegister(LoongArch::R2, PtrVT);
3424 }
3425 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3426 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3427 case Intrinsic::loongarch_lsx_vreplvei_d:
3428 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3429 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3430 case Intrinsic::loongarch_lsx_vreplvei_w:
3431 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3432 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3433 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3434 case Intrinsic::loongarch_lasx_xvpickve_d:
3435 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3436 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3437 case Intrinsic::loongarch_lasx_xvinsve0_d:
3438 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3439 case Intrinsic::loongarch_lsx_vsat_b:
3440 case Intrinsic::loongarch_lsx_vsat_bu:
3441 case Intrinsic::loongarch_lsx_vrotri_b:
3442 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3443 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3444 case Intrinsic::loongarch_lsx_vsrlri_b:
3445 case Intrinsic::loongarch_lsx_vsrari_b:
3446 case Intrinsic::loongarch_lsx_vreplvei_h:
3447 case Intrinsic::loongarch_lasx_xvsat_b:
3448 case Intrinsic::loongarch_lasx_xvsat_bu:
3449 case Intrinsic::loongarch_lasx_xvrotri_b:
3450 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3451 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3452 case Intrinsic::loongarch_lasx_xvsrlri_b:
3453 case Intrinsic::loongarch_lasx_xvsrari_b:
3454 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3455 case Intrinsic::loongarch_lasx_xvpickve_w:
3456 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3457 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3458 case Intrinsic::loongarch_lasx_xvinsve0_w:
3459 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3460 case Intrinsic::loongarch_lsx_vsat_h:
3461 case Intrinsic::loongarch_lsx_vsat_hu:
3462 case Intrinsic::loongarch_lsx_vrotri_h:
3463 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3464 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3465 case Intrinsic::loongarch_lsx_vsrlri_h:
3466 case Intrinsic::loongarch_lsx_vsrari_h:
3467 case Intrinsic::loongarch_lsx_vreplvei_b:
3468 case Intrinsic::loongarch_lasx_xvsat_h:
3469 case Intrinsic::loongarch_lasx_xvsat_hu:
3470 case Intrinsic::loongarch_lasx_xvrotri_h:
3471 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3472 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3473 case Intrinsic::loongarch_lasx_xvsrlri_h:
3474 case Intrinsic::loongarch_lasx_xvsrari_h:
3475 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3476 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3477 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3478 case Intrinsic::loongarch_lsx_vsrani_b_h:
3479 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3480 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3481 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3482 case Intrinsic::loongarch_lsx_vssrani_b_h:
3483 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3484 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3485 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3486 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3487 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3488 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3489 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3490 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3491 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3492 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3493 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3494 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3495 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3496 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3497 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3498 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3499 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3500 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3501 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3502 case Intrinsic::loongarch_lsx_vsat_w:
3503 case Intrinsic::loongarch_lsx_vsat_wu:
3504 case Intrinsic::loongarch_lsx_vrotri_w:
3505 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3506 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3507 case Intrinsic::loongarch_lsx_vsrlri_w:
3508 case Intrinsic::loongarch_lsx_vsrari_w:
3509 case Intrinsic::loongarch_lsx_vslei_bu:
3510 case Intrinsic::loongarch_lsx_vslei_hu:
3511 case Intrinsic::loongarch_lsx_vslei_wu:
3512 case Intrinsic::loongarch_lsx_vslei_du:
3513 case Intrinsic::loongarch_lsx_vslti_bu:
3514 case Intrinsic::loongarch_lsx_vslti_hu:
3515 case Intrinsic::loongarch_lsx_vslti_wu:
3516 case Intrinsic::loongarch_lsx_vslti_du:
3517 case Intrinsic::loongarch_lsx_vbsll_v:
3518 case Intrinsic::loongarch_lsx_vbsrl_v:
3519 case Intrinsic::loongarch_lasx_xvsat_w:
3520 case Intrinsic::loongarch_lasx_xvsat_wu:
3521 case Intrinsic::loongarch_lasx_xvrotri_w:
3522 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3523 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3524 case Intrinsic::loongarch_lasx_xvsrlri_w:
3525 case Intrinsic::loongarch_lasx_xvsrari_w:
3526 case Intrinsic::loongarch_lasx_xvslei_bu:
3527 case Intrinsic::loongarch_lasx_xvslei_hu:
3528 case Intrinsic::loongarch_lasx_xvslei_wu:
3529 case Intrinsic::loongarch_lasx_xvslei_du:
3530 case Intrinsic::loongarch_lasx_xvslti_bu:
3531 case Intrinsic::loongarch_lasx_xvslti_hu:
3532 case Intrinsic::loongarch_lasx_xvslti_wu:
3533 case Intrinsic::loongarch_lasx_xvslti_du:
3534 case Intrinsic::loongarch_lasx_xvbsll_v:
3535 case Intrinsic::loongarch_lasx_xvbsrl_v:
3536 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3537 case Intrinsic::loongarch_lsx_vseqi_b:
3538 case Intrinsic::loongarch_lsx_vseqi_h:
3539 case Intrinsic::loongarch_lsx_vseqi_w:
3540 case Intrinsic::loongarch_lsx_vseqi_d:
3541 case Intrinsic::loongarch_lsx_vslei_b:
3542 case Intrinsic::loongarch_lsx_vslei_h:
3543 case Intrinsic::loongarch_lsx_vslei_w:
3544 case Intrinsic::loongarch_lsx_vslei_d:
3545 case Intrinsic::loongarch_lsx_vslti_b:
3546 case Intrinsic::loongarch_lsx_vslti_h:
3547 case Intrinsic::loongarch_lsx_vslti_w:
3548 case Intrinsic::loongarch_lsx_vslti_d:
3549 case Intrinsic::loongarch_lasx_xvseqi_b:
3550 case Intrinsic::loongarch_lasx_xvseqi_h:
3551 case Intrinsic::loongarch_lasx_xvseqi_w:
3552 case Intrinsic::loongarch_lasx_xvseqi_d:
3553 case Intrinsic::loongarch_lasx_xvslei_b:
3554 case Intrinsic::loongarch_lasx_xvslei_h:
3555 case Intrinsic::loongarch_lasx_xvslei_w:
3556 case Intrinsic::loongarch_lasx_xvslei_d:
3557 case Intrinsic::loongarch_lasx_xvslti_b:
3558 case Intrinsic::loongarch_lasx_xvslti_h:
3559 case Intrinsic::loongarch_lasx_xvslti_w:
3560 case Intrinsic::loongarch_lasx_xvslti_d:
3561 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3562 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3563 case Intrinsic::loongarch_lsx_vsrani_h_w:
3564 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3565 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3566 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3567 case Intrinsic::loongarch_lsx_vssrani_h_w:
3568 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3569 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3570 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3571 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3572 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3573 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3574 case Intrinsic::loongarch_lsx_vfrstpi_b:
3575 case Intrinsic::loongarch_lsx_vfrstpi_h:
3576 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3577 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3578 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3579 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3580 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3581 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3582 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3583 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3584 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3585 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3586 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3587 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3588 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3589 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3590 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3591 case Intrinsic::loongarch_lsx_vsat_d:
3592 case Intrinsic::loongarch_lsx_vsat_du:
3593 case Intrinsic::loongarch_lsx_vrotri_d:
3594 case Intrinsic::loongarch_lsx_vsrlri_d:
3595 case Intrinsic::loongarch_lsx_vsrari_d:
3596 case Intrinsic::loongarch_lasx_xvsat_d:
3597 case Intrinsic::loongarch_lasx_xvsat_du:
3598 case Intrinsic::loongarch_lasx_xvrotri_d:
3599 case Intrinsic::loongarch_lasx_xvsrlri_d:
3600 case Intrinsic::loongarch_lasx_xvsrari_d:
3601 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3602 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3603 case Intrinsic::loongarch_lsx_vsrani_w_d:
3604 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3605 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3606 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3607 case Intrinsic::loongarch_lsx_vssrani_w_d:
3608 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3609 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3610 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3611 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3612 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3613 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3614 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3615 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3616 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3617 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3618 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3619 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3620 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3621 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3622 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3623 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3624 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3625 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3626 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3627 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3628 case Intrinsic::loongarch_lsx_vsrani_d_q:
3629 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3630 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3631 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3632 case Intrinsic::loongarch_lsx_vssrani_d_q:
3633 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3634 case Intrinsic::loongarch_lsx_vssrani_du_q:
3635 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3636 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3637 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3638 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3639 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3640 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3641 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3642 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3643 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3644 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3645 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3646 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3647 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3648 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3649 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3650 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3651 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3652 case Intrinsic::loongarch_lsx_vnori_b:
3653 case Intrinsic::loongarch_lsx_vshuf4i_b:
3654 case Intrinsic::loongarch_lsx_vshuf4i_h:
3655 case Intrinsic::loongarch_lsx_vshuf4i_w:
3656 case Intrinsic::loongarch_lasx_xvnori_b:
3657 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3658 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3659 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3660 case Intrinsic::loongarch_lasx_xvpermi_d:
3661 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3662 case Intrinsic::loongarch_lsx_vshuf4i_d:
3663 case Intrinsic::loongarch_lsx_vpermi_w:
3664 case Intrinsic::loongarch_lsx_vbitseli_b:
3665 case Intrinsic::loongarch_lsx_vextrins_b:
3666 case Intrinsic::loongarch_lsx_vextrins_h:
3667 case Intrinsic::loongarch_lsx_vextrins_w:
3668 case Intrinsic::loongarch_lsx_vextrins_d:
3669 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3670 case Intrinsic::loongarch_lasx_xvpermi_w:
3671 case Intrinsic::loongarch_lasx_xvpermi_q:
3672 case Intrinsic::loongarch_lasx_xvbitseli_b:
3673 case Intrinsic::loongarch_lasx_xvextrins_b:
3674 case Intrinsic::loongarch_lasx_xvextrins_h:
3675 case Intrinsic::loongarch_lasx_xvextrins_w:
3676 case Intrinsic::loongarch_lasx_xvextrins_d:
3677 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3678 case Intrinsic::loongarch_lsx_vrepli_b:
3679 case Intrinsic::loongarch_lsx_vrepli_h:
3680 case Intrinsic::loongarch_lsx_vrepli_w:
3681 case Intrinsic::loongarch_lsx_vrepli_d:
3682 case Intrinsic::loongarch_lasx_xvrepli_b:
3683 case Intrinsic::loongarch_lasx_xvrepli_h:
3684 case Intrinsic::loongarch_lasx_xvrepli_w:
3685 case Intrinsic::loongarch_lasx_xvrepli_d:
3686 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3687 case Intrinsic::loongarch_lsx_vldi:
3688 case Intrinsic::loongarch_lasx_xvldi:
3689 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3690 }
3691}
3692
3693// Helper function that emits error message for intrinsics with chain and return
3694// merge values of a UNDEF and the chain.
3696 StringRef ErrorMsg,
3697 SelectionDAG &DAG) {
3698 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3699 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3700 SDLoc(Op));
3701}
3702
3703SDValue
3704LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3705 SelectionDAG &DAG) const {
3706 SDLoc DL(Op);
3707 MVT GRLenVT = Subtarget.getGRLenVT();
3708 EVT VT = Op.getValueType();
3709 SDValue Chain = Op.getOperand(0);
3710 const StringRef ErrorMsgOOR = "argument out of range";
3711 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3712 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3713
3714 switch (Op.getConstantOperandVal(1)) {
3715 default:
3716 return Op;
3717 case Intrinsic::loongarch_crc_w_b_w:
3718 case Intrinsic::loongarch_crc_w_h_w:
3719 case Intrinsic::loongarch_crc_w_w_w:
3720 case Intrinsic::loongarch_crc_w_d_w:
3721 case Intrinsic::loongarch_crcc_w_b_w:
3722 case Intrinsic::loongarch_crcc_w_h_w:
3723 case Intrinsic::loongarch_crcc_w_w_w:
3724 case Intrinsic::loongarch_crcc_w_d_w:
3725 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3726 case Intrinsic::loongarch_csrrd_w:
3727 case Intrinsic::loongarch_csrrd_d: {
3728 unsigned Imm = Op.getConstantOperandVal(2);
3729 return !isUInt<14>(Imm)
3730 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3731 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3732 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3733 }
3734 case Intrinsic::loongarch_csrwr_w:
3735 case Intrinsic::loongarch_csrwr_d: {
3736 unsigned Imm = Op.getConstantOperandVal(3);
3737 return !isUInt<14>(Imm)
3738 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3739 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3740 {Chain, Op.getOperand(2),
3741 DAG.getConstant(Imm, DL, GRLenVT)});
3742 }
3743 case Intrinsic::loongarch_csrxchg_w:
3744 case Intrinsic::loongarch_csrxchg_d: {
3745 unsigned Imm = Op.getConstantOperandVal(4);
3746 return !isUInt<14>(Imm)
3747 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3748 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3749 {Chain, Op.getOperand(2), Op.getOperand(3),
3750 DAG.getConstant(Imm, DL, GRLenVT)});
3751 }
3752 case Intrinsic::loongarch_iocsrrd_d: {
3753 return DAG.getNode(
3754 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3755 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3756 }
3757#define IOCSRRD_CASE(NAME, NODE) \
3758 case Intrinsic::loongarch_##NAME: { \
3759 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3760 {Chain, Op.getOperand(2)}); \
3761 }
3762 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3763 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3764 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3765#undef IOCSRRD_CASE
3766 case Intrinsic::loongarch_cpucfg: {
3767 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3768 {Chain, Op.getOperand(2)});
3769 }
3770 case Intrinsic::loongarch_lddir_d: {
3771 unsigned Imm = Op.getConstantOperandVal(3);
3772 return !isUInt<8>(Imm)
3773 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3774 : Op;
3775 }
3776 case Intrinsic::loongarch_movfcsr2gr: {
3777 if (!Subtarget.hasBasicF())
3778 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3779 unsigned Imm = Op.getConstantOperandVal(2);
3780 return !isUInt<2>(Imm)
3781 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3782 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3783 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3784 }
3785 case Intrinsic::loongarch_lsx_vld:
3786 case Intrinsic::loongarch_lsx_vldrepl_b:
3787 case Intrinsic::loongarch_lasx_xvld:
3788 case Intrinsic::loongarch_lasx_xvldrepl_b:
3789 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3790 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3791 : SDValue();
3792 case Intrinsic::loongarch_lsx_vldrepl_h:
3793 case Intrinsic::loongarch_lasx_xvldrepl_h:
3794 return !isShiftedInt<11, 1>(
3795 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3797 Op, "argument out of range or not a multiple of 2", DAG)
3798 : SDValue();
3799 case Intrinsic::loongarch_lsx_vldrepl_w:
3800 case Intrinsic::loongarch_lasx_xvldrepl_w:
3801 return !isShiftedInt<10, 2>(
3802 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3804 Op, "argument out of range or not a multiple of 4", DAG)
3805 : SDValue();
3806 case Intrinsic::loongarch_lsx_vldrepl_d:
3807 case Intrinsic::loongarch_lasx_xvldrepl_d:
3808 return !isShiftedInt<9, 3>(
3809 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3811 Op, "argument out of range or not a multiple of 8", DAG)
3812 : SDValue();
3813 }
3814}
3815
3816// Helper function that emits error message for intrinsics with void return
3817// value and return the chain.
3819 SelectionDAG &DAG) {
3820
3821 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3822 return Op.getOperand(0);
3823}
3824
3825SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3826 SelectionDAG &DAG) const {
3827 SDLoc DL(Op);
3828 MVT GRLenVT = Subtarget.getGRLenVT();
3829 SDValue Chain = Op.getOperand(0);
3830 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3831 SDValue Op2 = Op.getOperand(2);
3832 const StringRef ErrorMsgOOR = "argument out of range";
3833 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3834 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3835 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3836
3837 switch (IntrinsicEnum) {
3838 default:
3839 // TODO: Add more Intrinsics.
3840 return SDValue();
3841 case Intrinsic::loongarch_cacop_d:
3842 case Intrinsic::loongarch_cacop_w: {
3843 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3844 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3845 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3846 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3847 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3848 unsigned Imm1 = Op2->getAsZExtVal();
3849 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3850 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3851 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3852 return Op;
3853 }
3854 case Intrinsic::loongarch_dbar: {
3855 unsigned Imm = Op2->getAsZExtVal();
3856 return !isUInt<15>(Imm)
3857 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3858 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3859 DAG.getConstant(Imm, DL, GRLenVT));
3860 }
3861 case Intrinsic::loongarch_ibar: {
3862 unsigned Imm = Op2->getAsZExtVal();
3863 return !isUInt<15>(Imm)
3864 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3865 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3866 DAG.getConstant(Imm, DL, GRLenVT));
3867 }
3868 case Intrinsic::loongarch_break: {
3869 unsigned Imm = Op2->getAsZExtVal();
3870 return !isUInt<15>(Imm)
3871 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3872 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3873 DAG.getConstant(Imm, DL, GRLenVT));
3874 }
3875 case Intrinsic::loongarch_movgr2fcsr: {
3876 if (!Subtarget.hasBasicF())
3877 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3878 unsigned Imm = Op2->getAsZExtVal();
3879 return !isUInt<2>(Imm)
3880 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3881 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3882 DAG.getConstant(Imm, DL, GRLenVT),
3883 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3884 Op.getOperand(3)));
3885 }
3886 case Intrinsic::loongarch_syscall: {
3887 unsigned Imm = Op2->getAsZExtVal();
3888 return !isUInt<15>(Imm)
3889 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3890 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3891 DAG.getConstant(Imm, DL, GRLenVT));
3892 }
3893#define IOCSRWR_CASE(NAME, NODE) \
3894 case Intrinsic::loongarch_##NAME: { \
3895 SDValue Op3 = Op.getOperand(3); \
3896 return Subtarget.is64Bit() \
3897 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3898 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3899 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3900 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3901 Op3); \
3902 }
3903 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3904 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3905 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3906#undef IOCSRWR_CASE
3907 case Intrinsic::loongarch_iocsrwr_d: {
3908 return !Subtarget.is64Bit()
3909 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3910 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3911 Op2,
3912 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3913 Op.getOperand(3)));
3914 }
3915#define ASRT_LE_GT_CASE(NAME) \
3916 case Intrinsic::loongarch_##NAME: { \
3917 return !Subtarget.is64Bit() \
3918 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3919 : Op; \
3920 }
3921 ASRT_LE_GT_CASE(asrtle_d)
3922 ASRT_LE_GT_CASE(asrtgt_d)
3923#undef ASRT_LE_GT_CASE
3924 case Intrinsic::loongarch_ldpte_d: {
3925 unsigned Imm = Op.getConstantOperandVal(3);
3926 return !Subtarget.is64Bit()
3927 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3928 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3929 : Op;
3930 }
3931 case Intrinsic::loongarch_lsx_vst:
3932 case Intrinsic::loongarch_lasx_xvst:
3933 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3934 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3935 : SDValue();
3936 case Intrinsic::loongarch_lasx_xvstelm_b:
3937 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3938 !isUInt<5>(Op.getConstantOperandVal(5)))
3939 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3940 : SDValue();
3941 case Intrinsic::loongarch_lsx_vstelm_b:
3942 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3943 !isUInt<4>(Op.getConstantOperandVal(5)))
3944 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3945 : SDValue();
3946 case Intrinsic::loongarch_lasx_xvstelm_h:
3947 return (!isShiftedInt<8, 1>(
3948 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3949 !isUInt<4>(Op.getConstantOperandVal(5)))
3951 Op, "argument out of range or not a multiple of 2", DAG)
3952 : SDValue();
3953 case Intrinsic::loongarch_lsx_vstelm_h:
3954 return (!isShiftedInt<8, 1>(
3955 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3956 !isUInt<3>(Op.getConstantOperandVal(5)))
3958 Op, "argument out of range or not a multiple of 2", DAG)
3959 : SDValue();
3960 case Intrinsic::loongarch_lasx_xvstelm_w:
3961 return (!isShiftedInt<8, 2>(
3962 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3963 !isUInt<3>(Op.getConstantOperandVal(5)))
3965 Op, "argument out of range or not a multiple of 4", DAG)
3966 : SDValue();
3967 case Intrinsic::loongarch_lsx_vstelm_w:
3968 return (!isShiftedInt<8, 2>(
3969 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3970 !isUInt<2>(Op.getConstantOperandVal(5)))
3972 Op, "argument out of range or not a multiple of 4", DAG)
3973 : SDValue();
3974 case Intrinsic::loongarch_lasx_xvstelm_d:
3975 return (!isShiftedInt<8, 3>(
3976 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3977 !isUInt<2>(Op.getConstantOperandVal(5)))
3979 Op, "argument out of range or not a multiple of 8", DAG)
3980 : SDValue();
3981 case Intrinsic::loongarch_lsx_vstelm_d:
3982 return (!isShiftedInt<8, 3>(
3983 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3984 !isUInt<1>(Op.getConstantOperandVal(5)))
3986 Op, "argument out of range or not a multiple of 8", DAG)
3987 : SDValue();
3988 }
3989}
3990
3991SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3992 SelectionDAG &DAG) const {
3993 SDLoc DL(Op);
3994 SDValue Lo = Op.getOperand(0);
3995 SDValue Hi = Op.getOperand(1);
3996 SDValue Shamt = Op.getOperand(2);
3997 EVT VT = Lo.getValueType();
3998
3999 // if Shamt-GRLen < 0: // Shamt < GRLen
4000 // Lo = Lo << Shamt
4001 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4002 // else:
4003 // Lo = 0
4004 // Hi = Lo << (Shamt-GRLen)
4005
4006 SDValue Zero = DAG.getConstant(0, DL, VT);
4007 SDValue One = DAG.getConstant(1, DL, VT);
4008 SDValue MinusGRLen =
4009 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4010 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4011 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4012 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4013
4014 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4015 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4016 SDValue ShiftRightLo =
4017 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4018 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4019 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4020 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4021
4022 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4023
4024 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4025 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4026
4027 SDValue Parts[2] = {Lo, Hi};
4028 return DAG.getMergeValues(Parts, DL);
4029}
4030
4031SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4032 SelectionDAG &DAG,
4033 bool IsSRA) const {
4034 SDLoc DL(Op);
4035 SDValue Lo = Op.getOperand(0);
4036 SDValue Hi = Op.getOperand(1);
4037 SDValue Shamt = Op.getOperand(2);
4038 EVT VT = Lo.getValueType();
4039
4040 // SRA expansion:
4041 // if Shamt-GRLen < 0: // Shamt < GRLen
4042 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4043 // Hi = Hi >>s Shamt
4044 // else:
4045 // Lo = Hi >>s (Shamt-GRLen);
4046 // Hi = Hi >>s (GRLen-1)
4047 //
4048 // SRL expansion:
4049 // if Shamt-GRLen < 0: // Shamt < GRLen
4050 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4051 // Hi = Hi >>u Shamt
4052 // else:
4053 // Lo = Hi >>u (Shamt-GRLen);
4054 // Hi = 0;
4055
4056 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4057
4058 SDValue Zero = DAG.getConstant(0, DL, VT);
4059 SDValue One = DAG.getConstant(1, DL, VT);
4060 SDValue MinusGRLen =
4061 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4062 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4063 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4064 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4065
4066 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4067 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4068 SDValue ShiftLeftHi =
4069 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4070 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4071 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4072 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4073 SDValue HiFalse =
4074 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4075
4076 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4077
4078 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4079 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4080
4081 SDValue Parts[2] = {Lo, Hi};
4082 return DAG.getMergeValues(Parts, DL);
4083}
4084
4085// Returns the opcode of the target-specific SDNode that implements the 32-bit
4086// form of the given Opcode.
4088 switch (Opcode) {
4089 default:
4090 llvm_unreachable("Unexpected opcode");
4091 case ISD::SDIV:
4092 return LoongArchISD::DIV_W;
4093 case ISD::UDIV:
4094 return LoongArchISD::DIV_WU;
4095 case ISD::SREM:
4096 return LoongArchISD::MOD_W;
4097 case ISD::UREM:
4098 return LoongArchISD::MOD_WU;
4099 case ISD::SHL:
4100 return LoongArchISD::SLL_W;
4101 case ISD::SRA:
4102 return LoongArchISD::SRA_W;
4103 case ISD::SRL:
4104 return LoongArchISD::SRL_W;
4105 case ISD::ROTL:
4106 case ISD::ROTR:
4107 return LoongArchISD::ROTR_W;
4108 case ISD::CTTZ:
4109 return LoongArchISD::CTZ_W;
4110 case ISD::CTLZ:
4111 return LoongArchISD::CLZ_W;
4112 }
4113}
4114
4115// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4116// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4117// otherwise be promoted to i64, making it difficult to select the
4118// SLL_W/.../*W later one because the fact the operation was originally of
4119// type i8/i16/i32 is lost.
4121 unsigned ExtOpc = ISD::ANY_EXTEND) {
4122 SDLoc DL(N);
4123 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4124 SDValue NewOp0, NewRes;
4125
4126 switch (NumOp) {
4127 default:
4128 llvm_unreachable("Unexpected NumOp");
4129 case 1: {
4130 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4131 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4132 break;
4133 }
4134 case 2: {
4135 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4136 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4137 if (N->getOpcode() == ISD::ROTL) {
4138 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4139 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4140 }
4141 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4142 break;
4143 }
4144 // TODO:Handle more NumOp.
4145 }
4146
4147 // ReplaceNodeResults requires we maintain the same type for the return
4148 // value.
4149 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4150}
4151
4152// Converts the given 32-bit operation to a i64 operation with signed extension
4153// semantic to reduce the signed extension instructions.
4155 SDLoc DL(N);
4156 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4157 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4158 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4159 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4160 DAG.getValueType(MVT::i32));
4161 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4162}
4163
4164// Helper function that emits error message for intrinsics with/without chain
4165// and return a UNDEF or and the chain as the results.
4168 StringRef ErrorMsg, bool WithChain = true) {
4169 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4170 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4171 if (!WithChain)
4172 return;
4173 Results.push_back(N->getOperand(0));
4174}
4175
4176template <unsigned N>
4177static void
4179 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4180 unsigned ResOp) {
4181 const StringRef ErrorMsgOOR = "argument out of range";
4182 unsigned Imm = Node->getConstantOperandVal(2);
4183 if (!isUInt<N>(Imm)) {
4185 /*WithChain=*/false);
4186 return;
4187 }
4188 SDLoc DL(Node);
4189 SDValue Vec = Node->getOperand(1);
4190
4191 SDValue PickElt =
4192 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4193 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4195 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4196 PickElt.getValue(0)));
4197}
4198
4201 SelectionDAG &DAG,
4202 const LoongArchSubtarget &Subtarget,
4203 unsigned ResOp) {
4204 SDLoc DL(N);
4205 SDValue Vec = N->getOperand(1);
4206
4207 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4208 Results.push_back(
4209 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4210}
4211
4212static void
4214 SelectionDAG &DAG,
4215 const LoongArchSubtarget &Subtarget) {
4216 switch (N->getConstantOperandVal(0)) {
4217 default:
4218 llvm_unreachable("Unexpected Intrinsic.");
4219 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4220 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4222 break;
4223 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4224 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4225 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4227 break;
4228 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4229 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4231 break;
4232 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4233 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4235 break;
4236 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4237 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4238 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4240 break;
4241 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4242 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4244 break;
4245 case Intrinsic::loongarch_lsx_bz_b:
4246 case Intrinsic::loongarch_lsx_bz_h:
4247 case Intrinsic::loongarch_lsx_bz_w:
4248 case Intrinsic::loongarch_lsx_bz_d:
4249 case Intrinsic::loongarch_lasx_xbz_b:
4250 case Intrinsic::loongarch_lasx_xbz_h:
4251 case Intrinsic::loongarch_lasx_xbz_w:
4252 case Intrinsic::loongarch_lasx_xbz_d:
4253 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4255 break;
4256 case Intrinsic::loongarch_lsx_bz_v:
4257 case Intrinsic::loongarch_lasx_xbz_v:
4258 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4260 break;
4261 case Intrinsic::loongarch_lsx_bnz_b:
4262 case Intrinsic::loongarch_lsx_bnz_h:
4263 case Intrinsic::loongarch_lsx_bnz_w:
4264 case Intrinsic::loongarch_lsx_bnz_d:
4265 case Intrinsic::loongarch_lasx_xbnz_b:
4266 case Intrinsic::loongarch_lasx_xbnz_h:
4267 case Intrinsic::loongarch_lasx_xbnz_w:
4268 case Intrinsic::loongarch_lasx_xbnz_d:
4269 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4271 break;
4272 case Intrinsic::loongarch_lsx_bnz_v:
4273 case Intrinsic::loongarch_lasx_xbnz_v:
4274 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4276 break;
4277 }
4278}
4279
4282 SelectionDAG &DAG) {
4283 assert(N->getValueType(0) == MVT::i128 &&
4284 "AtomicCmpSwap on types less than 128 should be legal");
4285 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4286
4287 unsigned Opcode;
4288 switch (MemOp->getMergedOrdering()) {
4292 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4293 break;
4296 Opcode = LoongArch::PseudoCmpXchg128;
4297 break;
4298 default:
4299 llvm_unreachable("Unexpected ordering!");
4300 }
4301
4302 SDLoc DL(N);
4303 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4304 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4305 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4306 NewVal.first, NewVal.second, N->getOperand(0)};
4307
4308 SDNode *CmpSwap = DAG.getMachineNode(
4309 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4310 Ops);
4311 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4312 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4313 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4314 Results.push_back(SDValue(CmpSwap, 3));
4315}
4316
4319 SDLoc DL(N);
4320 EVT VT = N->getValueType(0);
4321 switch (N->getOpcode()) {
4322 default:
4323 llvm_unreachable("Don't know how to legalize this operation");
4324 case ISD::ADD:
4325 case ISD::SUB:
4326 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4327 "Unexpected custom legalisation");
4328 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4329 break;
4330 case ISD::SDIV:
4331 case ISD::UDIV:
4332 case ISD::SREM:
4333 case ISD::UREM:
4334 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4335 "Unexpected custom legalisation");
4336 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4337 Subtarget.hasDiv32() && VT == MVT::i32
4339 : ISD::SIGN_EXTEND));
4340 break;
4341 case ISD::SHL:
4342 case ISD::SRA:
4343 case ISD::SRL:
4344 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4345 "Unexpected custom legalisation");
4346 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4347 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4348 break;
4349 }
4350 break;
4351 case ISD::ROTL:
4352 case ISD::ROTR:
4353 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4354 "Unexpected custom legalisation");
4355 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4356 break;
4357 case ISD::FP_TO_SINT: {
4358 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4359 "Unexpected custom legalisation");
4360 SDValue Src = N->getOperand(0);
4361 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4362 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4364 if (!isTypeLegal(Src.getValueType()))
4365 return;
4366 if (Src.getValueType() == MVT::f16)
4367 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4368 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4369 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4370 return;
4371 }
4372 // If the FP type needs to be softened, emit a library call using the 'si'
4373 // version. If we left it to default legalization we'd end up with 'di'.
4374 RTLIB::Libcall LC;
4375 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4376 MakeLibCallOptions CallOptions;
4377 EVT OpVT = Src.getValueType();
4378 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4379 SDValue Chain = SDValue();
4380 SDValue Result;
4381 std::tie(Result, Chain) =
4382 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4383 Results.push_back(Result);
4384 break;
4385 }
4386 case ISD::BITCAST: {
4387 SDValue Src = N->getOperand(0);
4388 EVT SrcVT = Src.getValueType();
4389 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4390 Subtarget.hasBasicF()) {
4391 SDValue Dst =
4392 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4393 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4394 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4396 DAG.getVTList(MVT::i32, MVT::i32), Src);
4397 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4398 NewReg.getValue(0), NewReg.getValue(1));
4399 Results.push_back(RetReg);
4400 }
4401 break;
4402 }
4403 case ISD::FP_TO_UINT: {
4404 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4405 "Unexpected custom legalisation");
4406 auto &TLI = DAG.getTargetLoweringInfo();
4407 SDValue Tmp1, Tmp2;
4408 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4409 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4410 break;
4411 }
4412 case ISD::BSWAP: {
4413 SDValue Src = N->getOperand(0);
4414 assert((VT == MVT::i16 || VT == MVT::i32) &&
4415 "Unexpected custom legalization");
4416 MVT GRLenVT = Subtarget.getGRLenVT();
4417 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4418 SDValue Tmp;
4419 switch (VT.getSizeInBits()) {
4420 default:
4421 llvm_unreachable("Unexpected operand width");
4422 case 16:
4423 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4424 break;
4425 case 32:
4426 // Only LA64 will get to here due to the size mismatch between VT and
4427 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4428 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4429 break;
4430 }
4431 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4432 break;
4433 }
4434 case ISD::BITREVERSE: {
4435 SDValue Src = N->getOperand(0);
4436 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4437 "Unexpected custom legalization");
4438 MVT GRLenVT = Subtarget.getGRLenVT();
4439 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4440 SDValue Tmp;
4441 switch (VT.getSizeInBits()) {
4442 default:
4443 llvm_unreachable("Unexpected operand width");
4444 case 8:
4445 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4446 break;
4447 case 32:
4448 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4449 break;
4450 }
4451 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4452 break;
4453 }
4454 case ISD::CTLZ:
4455 case ISD::CTTZ: {
4456 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4457 "Unexpected custom legalisation");
4458 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4459 break;
4460 }
4462 SDValue Chain = N->getOperand(0);
4463 SDValue Op2 = N->getOperand(2);
4464 MVT GRLenVT = Subtarget.getGRLenVT();
4465 const StringRef ErrorMsgOOR = "argument out of range";
4466 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4467 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4468
4469 switch (N->getConstantOperandVal(1)) {
4470 default:
4471 llvm_unreachable("Unexpected Intrinsic.");
4472 case Intrinsic::loongarch_movfcsr2gr: {
4473 if (!Subtarget.hasBasicF()) {
4474 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4475 return;
4476 }
4477 unsigned Imm = Op2->getAsZExtVal();
4478 if (!isUInt<2>(Imm)) {
4479 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4480 return;
4481 }
4482 SDValue MOVFCSR2GRResults = DAG.getNode(
4483 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4484 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4485 Results.push_back(
4486 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4487 Results.push_back(MOVFCSR2GRResults.getValue(1));
4488 break;
4489 }
4490#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4491 case Intrinsic::loongarch_##NAME: { \
4492 SDValue NODE = DAG.getNode( \
4493 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4494 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4495 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4496 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4497 Results.push_back(NODE.getValue(1)); \
4498 break; \
4499 }
4500 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4501 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4502 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4503 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4504 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4505 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4506#undef CRC_CASE_EXT_BINARYOP
4507
4508#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4509 case Intrinsic::loongarch_##NAME: { \
4510 SDValue NODE = DAG.getNode( \
4511 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4512 {Chain, Op2, \
4513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4514 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4515 Results.push_back(NODE.getValue(1)); \
4516 break; \
4517 }
4518 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4519 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4520#undef CRC_CASE_EXT_UNARYOP
4521#define CSR_CASE(ID) \
4522 case Intrinsic::loongarch_##ID: { \
4523 if (!Subtarget.is64Bit()) \
4524 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4525 break; \
4526 }
4527 CSR_CASE(csrrd_d);
4528 CSR_CASE(csrwr_d);
4529 CSR_CASE(csrxchg_d);
4530 CSR_CASE(iocsrrd_d);
4531#undef CSR_CASE
4532 case Intrinsic::loongarch_csrrd_w: {
4533 unsigned Imm = Op2->getAsZExtVal();
4534 if (!isUInt<14>(Imm)) {
4535 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4536 return;
4537 }
4538 SDValue CSRRDResults =
4539 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4540 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4541 Results.push_back(
4542 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4543 Results.push_back(CSRRDResults.getValue(1));
4544 break;
4545 }
4546 case Intrinsic::loongarch_csrwr_w: {
4547 unsigned Imm = N->getConstantOperandVal(3);
4548 if (!isUInt<14>(Imm)) {
4549 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4550 return;
4551 }
4552 SDValue CSRWRResults =
4553 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4554 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4555 DAG.getConstant(Imm, DL, GRLenVT)});
4556 Results.push_back(
4557 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4558 Results.push_back(CSRWRResults.getValue(1));
4559 break;
4560 }
4561 case Intrinsic::loongarch_csrxchg_w: {
4562 unsigned Imm = N->getConstantOperandVal(4);
4563 if (!isUInt<14>(Imm)) {
4564 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4565 return;
4566 }
4567 SDValue CSRXCHGResults = DAG.getNode(
4568 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4569 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4570 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4571 DAG.getConstant(Imm, DL, GRLenVT)});
4572 Results.push_back(
4573 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4574 Results.push_back(CSRXCHGResults.getValue(1));
4575 break;
4576 }
4577#define IOCSRRD_CASE(NAME, NODE) \
4578 case Intrinsic::loongarch_##NAME: { \
4579 SDValue IOCSRRDResults = \
4580 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4581 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4582 Results.push_back( \
4583 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4584 Results.push_back(IOCSRRDResults.getValue(1)); \
4585 break; \
4586 }
4587 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4588 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4589 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4590#undef IOCSRRD_CASE
4591 case Intrinsic::loongarch_cpucfg: {
4592 SDValue CPUCFGResults =
4593 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4594 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4595 Results.push_back(
4596 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4597 Results.push_back(CPUCFGResults.getValue(1));
4598 break;
4599 }
4600 case Intrinsic::loongarch_lddir_d: {
4601 if (!Subtarget.is64Bit()) {
4602 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4603 return;
4604 }
4605 break;
4606 }
4607 }
4608 break;
4609 }
4610 case ISD::READ_REGISTER: {
4611 if (Subtarget.is64Bit())
4612 DAG.getContext()->emitError(
4613 "On LA64, only 64-bit registers can be read.");
4614 else
4615 DAG.getContext()->emitError(
4616 "On LA32, only 32-bit registers can be read.");
4617 Results.push_back(DAG.getUNDEF(VT));
4618 Results.push_back(N->getOperand(0));
4619 break;
4620 }
4622 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4623 break;
4624 }
4625 case ISD::LROUND: {
4626 SDValue Op0 = N->getOperand(0);
4627 EVT OpVT = Op0.getValueType();
4628 RTLIB::Libcall LC =
4629 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4630 MakeLibCallOptions CallOptions;
4631 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4632 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4633 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4634 Results.push_back(Result);
4635 break;
4636 }
4637 case ISD::ATOMIC_CMP_SWAP: {
4639 break;
4640 }
4641 case ISD::TRUNCATE: {
4642 MVT VT = N->getSimpleValueType(0);
4643 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4644 return;
4645
4646 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4647 SDValue In = N->getOperand(0);
4648 EVT InVT = In.getValueType();
4649 EVT InEltVT = InVT.getVectorElementType();
4650 EVT EltVT = VT.getVectorElementType();
4651 unsigned MinElts = VT.getVectorNumElements();
4652 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4653 unsigned InBits = InVT.getSizeInBits();
4654
4655 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4656 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4657 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4658 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4659 for (unsigned I = 0; I < MinElts; ++I)
4660 TruncMask[I] = Scale * I;
4661
4662 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4663 MVT SVT = In.getSimpleValueType().getScalarType();
4664 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4665 SDValue WidenIn =
4666 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4667 DAG.getVectorIdxConstant(0, DL));
4668 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4669 "Illegal vector type in truncation");
4670 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4671 Results.push_back(
4672 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4673 return;
4674 }
4675 }
4676
4677 break;
4678 }
4679 }
4680}
4681
4684 const LoongArchSubtarget &Subtarget) {
4685 if (DCI.isBeforeLegalizeOps())
4686 return SDValue();
4687
4688 SDValue FirstOperand = N->getOperand(0);
4689 SDValue SecondOperand = N->getOperand(1);
4690 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4691 EVT ValTy = N->getValueType(0);
4692 SDLoc DL(N);
4693 uint64_t lsb, msb;
4694 unsigned SMIdx, SMLen;
4695 ConstantSDNode *CN;
4696 SDValue NewOperand;
4697 MVT GRLenVT = Subtarget.getGRLenVT();
4698
4699 // BSTRPICK requires the 32S feature.
4700 if (!Subtarget.has32S())
4701 return SDValue();
4702
4703 // Op's second operand must be a shifted mask.
4704 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4705 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4706 return SDValue();
4707
4708 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4709 // Pattern match BSTRPICK.
4710 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4711 // => BSTRPICK $dst, $src, msb, lsb
4712 // where msb = lsb + len - 1
4713
4714 // The second operand of the shift must be an immediate.
4715 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4716 return SDValue();
4717
4718 lsb = CN->getZExtValue();
4719
4720 // Return if the shifted mask does not start at bit 0 or the sum of its
4721 // length and lsb exceeds the word's size.
4722 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4723 return SDValue();
4724
4725 NewOperand = FirstOperand.getOperand(0);
4726 } else {
4727 // Pattern match BSTRPICK.
4728 // $dst = and $src, (2**len- 1) , if len > 12
4729 // => BSTRPICK $dst, $src, msb, lsb
4730 // where lsb = 0 and msb = len - 1
4731
4732 // If the mask is <= 0xfff, andi can be used instead.
4733 if (CN->getZExtValue() <= 0xfff)
4734 return SDValue();
4735
4736 // Return if the MSB exceeds.
4737 if (SMIdx + SMLen > ValTy.getSizeInBits())
4738 return SDValue();
4739
4740 if (SMIdx > 0) {
4741 // Omit if the constant has more than 2 uses. This a conservative
4742 // decision. Whether it is a win depends on the HW microarchitecture.
4743 // However it should always be better for 1 and 2 uses.
4744 if (CN->use_size() > 2)
4745 return SDValue();
4746 // Return if the constant can be composed by a single LU12I.W.
4747 if ((CN->getZExtValue() & 0xfff) == 0)
4748 return SDValue();
4749 // Return if the constand can be composed by a single ADDI with
4750 // the zero register.
4751 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4752 return SDValue();
4753 }
4754
4755 lsb = SMIdx;
4756 NewOperand = FirstOperand;
4757 }
4758
4759 msb = lsb + SMLen - 1;
4760 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4761 DAG.getConstant(msb, DL, GRLenVT),
4762 DAG.getConstant(lsb, DL, GRLenVT));
4763 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4764 return NR0;
4765 // Try to optimize to
4766 // bstrpick $Rd, $Rs, msb, lsb
4767 // slli $Rd, $Rd, lsb
4768 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4769 DAG.getConstant(lsb, DL, GRLenVT));
4770}
4771
4774 const LoongArchSubtarget &Subtarget) {
4775 // BSTRPICK requires the 32S feature.
4776 if (!Subtarget.has32S())
4777 return SDValue();
4778
4779 if (DCI.isBeforeLegalizeOps())
4780 return SDValue();
4781
4782 // $dst = srl (and $src, Mask), Shamt
4783 // =>
4784 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4785 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4786 //
4787
4788 SDValue FirstOperand = N->getOperand(0);
4789 ConstantSDNode *CN;
4790 EVT ValTy = N->getValueType(0);
4791 SDLoc DL(N);
4792 MVT GRLenVT = Subtarget.getGRLenVT();
4793 unsigned MaskIdx, MaskLen;
4794 uint64_t Shamt;
4795
4796 // The first operand must be an AND and the second operand of the AND must be
4797 // a shifted mask.
4798 if (FirstOperand.getOpcode() != ISD::AND ||
4799 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4800 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4801 return SDValue();
4802
4803 // The second operand (shift amount) must be an immediate.
4804 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4805 return SDValue();
4806
4807 Shamt = CN->getZExtValue();
4808 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4809 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4810 FirstOperand->getOperand(0),
4811 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4812 DAG.getConstant(Shamt, DL, GRLenVT));
4813
4814 return SDValue();
4815}
4816
4817// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4818// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4819static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4820 unsigned Depth) {
4821 // Limit recursion.
4823 return false;
4824 switch (Src.getOpcode()) {
4825 case ISD::SETCC:
4826 case ISD::TRUNCATE:
4827 return Src.getOperand(0).getValueSizeInBits() == Size;
4828 case ISD::FREEZE:
4829 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4830 case ISD::AND:
4831 case ISD::XOR:
4832 case ISD::OR:
4833 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4834 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4835 case ISD::SELECT:
4836 case ISD::VSELECT:
4837 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4838 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4839 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4840 case ISD::BUILD_VECTOR:
4841 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4842 ISD::isBuildVectorAllOnes(Src.getNode());
4843 }
4844 return false;
4845}
4846
4847// Helper to push sign extension of vXi1 SETCC result through bitops.
4849 SDValue Src, const SDLoc &DL) {
4850 switch (Src.getOpcode()) {
4851 case ISD::SETCC:
4852 case ISD::FREEZE:
4853 case ISD::TRUNCATE:
4854 case ISD::BUILD_VECTOR:
4855 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4856 case ISD::AND:
4857 case ISD::XOR:
4858 case ISD::OR:
4859 return DAG.getNode(
4860 Src.getOpcode(), DL, SExtVT,
4861 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4862 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4863 case ISD::SELECT:
4864 case ISD::VSELECT:
4865 return DAG.getSelect(
4866 DL, SExtVT, Src.getOperand(0),
4867 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4868 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4869 }
4870 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4871}
4872
4873static SDValue
4876 const LoongArchSubtarget &Subtarget) {
4877 SDLoc DL(N);
4878 EVT VT = N->getValueType(0);
4879 SDValue Src = N->getOperand(0);
4880 EVT SrcVT = Src.getValueType();
4881
4882 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4883 return SDValue();
4884
4885 bool UseLASX;
4886 unsigned Opc = ISD::DELETED_NODE;
4887 EVT CmpVT = Src.getOperand(0).getValueType();
4888 EVT EltVT = CmpVT.getVectorElementType();
4889
4890 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4891 UseLASX = false;
4892 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4893 CmpVT.getSizeInBits() == 256)
4894 UseLASX = true;
4895 else
4896 return SDValue();
4897
4898 SDValue SrcN1 = Src.getOperand(1);
4899 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4900 default:
4901 break;
4902 case ISD::SETEQ:
4903 // x == 0 => not (vmsknez.b x)
4904 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4906 break;
4907 case ISD::SETGT:
4908 // x > -1 => vmskgez.b x
4909 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4911 break;
4912 case ISD::SETGE:
4913 // x >= 0 => vmskgez.b x
4914 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4916 break;
4917 case ISD::SETLT:
4918 // x < 0 => vmskltz.{b,h,w,d} x
4919 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4920 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4921 EltVT == MVT::i64))
4923 break;
4924 case ISD::SETLE:
4925 // x <= -1 => vmskltz.{b,h,w,d} x
4926 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4927 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4928 EltVT == MVT::i64))
4930 break;
4931 case ISD::SETNE:
4932 // x != 0 => vmsknez.b x
4933 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4935 break;
4936 }
4937
4938 if (Opc == ISD::DELETED_NODE)
4939 return SDValue();
4940
4941 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4943 V = DAG.getZExtOrTrunc(V, DL, T);
4944 return DAG.getBitcast(VT, V);
4945}
4946
4949 const LoongArchSubtarget &Subtarget) {
4950 SDLoc DL(N);
4951 EVT VT = N->getValueType(0);
4952 SDValue Src = N->getOperand(0);
4953 EVT SrcVT = Src.getValueType();
4954
4955 if (!DCI.isBeforeLegalizeOps())
4956 return SDValue();
4957
4958 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4959 return SDValue();
4960
4961 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4962 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4963 if (Res)
4964 return Res;
4965
4966 // Generate vXi1 using [X]VMSKLTZ
4967 MVT SExtVT;
4968 unsigned Opc;
4969 bool UseLASX = false;
4970 bool PropagateSExt = false;
4971
4972 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4973 EVT CmpVT = Src.getOperand(0).getValueType();
4974 if (CmpVT.getSizeInBits() > 256)
4975 return SDValue();
4976 }
4977
4978 switch (SrcVT.getSimpleVT().SimpleTy) {
4979 default:
4980 return SDValue();
4981 case MVT::v2i1:
4982 SExtVT = MVT::v2i64;
4983 break;
4984 case MVT::v4i1:
4985 SExtVT = MVT::v4i32;
4986 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4987 SExtVT = MVT::v4i64;
4988 UseLASX = true;
4989 PropagateSExt = true;
4990 }
4991 break;
4992 case MVT::v8i1:
4993 SExtVT = MVT::v8i16;
4994 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4995 SExtVT = MVT::v8i32;
4996 UseLASX = true;
4997 PropagateSExt = true;
4998 }
4999 break;
5000 case MVT::v16i1:
5001 SExtVT = MVT::v16i8;
5002 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5003 SExtVT = MVT::v16i16;
5004 UseLASX = true;
5005 PropagateSExt = true;
5006 }
5007 break;
5008 case MVT::v32i1:
5009 SExtVT = MVT::v32i8;
5010 UseLASX = true;
5011 break;
5012 };
5013 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5014 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5015
5016 SDValue V;
5017 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5018 if (Src.getSimpleValueType() == MVT::v32i8) {
5019 SDValue Lo, Hi;
5020 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5021 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5022 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5023 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5024 DAG.getConstant(16, DL, MVT::i8));
5025 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5026 } else if (UseLASX) {
5027 return SDValue();
5028 }
5029 }
5030
5031 if (!V) {
5033 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5034 }
5035
5037 V = DAG.getZExtOrTrunc(V, DL, T);
5038 return DAG.getBitcast(VT, V);
5039}
5040
5043 const LoongArchSubtarget &Subtarget) {
5044 MVT GRLenVT = Subtarget.getGRLenVT();
5045 EVT ValTy = N->getValueType(0);
5046 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5047 ConstantSDNode *CN0, *CN1;
5048 SDLoc DL(N);
5049 unsigned ValBits = ValTy.getSizeInBits();
5050 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5051 unsigned Shamt;
5052 bool SwapAndRetried = false;
5053
5054 // BSTRPICK requires the 32S feature.
5055 if (!Subtarget.has32S())
5056 return SDValue();
5057
5058 if (DCI.isBeforeLegalizeOps())
5059 return SDValue();
5060
5061 if (ValBits != 32 && ValBits != 64)
5062 return SDValue();
5063
5064Retry:
5065 // 1st pattern to match BSTRINS:
5066 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5067 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5068 // =>
5069 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5070 if (N0.getOpcode() == ISD::AND &&
5071 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5072 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5073 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5074 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5075 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5076 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5077 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5078 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5079 (MaskIdx0 + MaskLen0 <= ValBits)) {
5080 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5081 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5082 N1.getOperand(0).getOperand(0),
5083 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5084 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5085 }
5086
5087 // 2nd pattern to match BSTRINS:
5088 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5089 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5090 // =>
5091 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5092 if (N0.getOpcode() == ISD::AND &&
5093 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5094 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5095 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5096 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5097 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5098 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5099 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5100 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5101 (MaskIdx0 + MaskLen0 <= ValBits)) {
5102 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5103 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5104 N1.getOperand(0).getOperand(0),
5105 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5106 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5107 }
5108
5109 // 3rd pattern to match BSTRINS:
5110 // R = or (and X, mask0), (and Y, mask1)
5111 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5112 // =>
5113 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5114 // where msb = lsb + size - 1
5115 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5116 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5117 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5118 (MaskIdx0 + MaskLen0 <= 64) &&
5119 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5120 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5121 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5122 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5123 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5124 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5125 DAG.getConstant(ValBits == 32
5126 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5127 : (MaskIdx0 + MaskLen0 - 1),
5128 DL, GRLenVT),
5129 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5130 }
5131
5132 // 4th pattern to match BSTRINS:
5133 // R = or (and X, mask), (shl Y, shamt)
5134 // where mask = (2**shamt - 1)
5135 // =>
5136 // R = BSTRINS X, Y, ValBits - 1, shamt
5137 // where ValBits = 32 or 64
5138 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5139 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5140 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5141 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5142 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5143 (MaskIdx0 + MaskLen0 <= ValBits)) {
5144 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5145 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5146 N1.getOperand(0),
5147 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5148 DAG.getConstant(Shamt, DL, GRLenVT));
5149 }
5150
5151 // 5th pattern to match BSTRINS:
5152 // R = or (and X, mask), const
5153 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5154 // =>
5155 // R = BSTRINS X, (const >> lsb), msb, lsb
5156 // where msb = lsb + size - 1
5157 if (N0.getOpcode() == ISD::AND &&
5158 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5159 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5160 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5161 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5162 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5163 return DAG.getNode(
5164 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5165 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5166 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5167 : (MaskIdx0 + MaskLen0 - 1),
5168 DL, GRLenVT),
5169 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5170 }
5171
5172 // 6th pattern.
5173 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5174 // by the incoming bits are known to be zero.
5175 // =>
5176 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5177 //
5178 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5179 // pattern is more common than the 1st. So we put the 1st before the 6th in
5180 // order to match as many nodes as possible.
5181 ConstantSDNode *CNMask, *CNShamt;
5182 unsigned MaskIdx, MaskLen;
5183 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5184 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5185 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5186 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5187 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5188 Shamt = CNShamt->getZExtValue();
5189 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5190 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5191 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5192 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5193 N1.getOperand(0).getOperand(0),
5194 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5195 DAG.getConstant(Shamt, DL, GRLenVT));
5196 }
5197 }
5198
5199 // 7th pattern.
5200 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5201 // overwritten by the incoming bits are known to be zero.
5202 // =>
5203 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5204 //
5205 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5206 // before the 7th in order to match as many nodes as possible.
5207 if (N1.getOpcode() == ISD::AND &&
5208 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5209 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5210 N1.getOperand(0).getOpcode() == ISD::SHL &&
5211 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5212 CNShamt->getZExtValue() == MaskIdx) {
5213 APInt ShMask(ValBits, CNMask->getZExtValue());
5214 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5215 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5216 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5217 N1.getOperand(0).getOperand(0),
5218 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5219 DAG.getConstant(MaskIdx, DL, GRLenVT));
5220 }
5221 }
5222
5223 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5224 if (!SwapAndRetried) {
5225 std::swap(N0, N1);
5226 SwapAndRetried = true;
5227 goto Retry;
5228 }
5229
5230 SwapAndRetried = false;
5231Retry2:
5232 // 8th pattern.
5233 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5234 // the incoming bits are known to be zero.
5235 // =>
5236 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5237 //
5238 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5239 // we put it here in order to match as many nodes as possible or generate less
5240 // instructions.
5241 if (N1.getOpcode() == ISD::AND &&
5242 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5243 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5244 APInt ShMask(ValBits, CNMask->getZExtValue());
5245 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5246 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5247 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5248 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5249 N1->getOperand(0),
5250 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5251 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5252 DAG.getConstant(MaskIdx, DL, GRLenVT));
5253 }
5254 }
5255 // Swap N0/N1 and retry.
5256 if (!SwapAndRetried) {
5257 std::swap(N0, N1);
5258 SwapAndRetried = true;
5259 goto Retry2;
5260 }
5261
5262 return SDValue();
5263}
5264
5265static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5266 ExtType = ISD::NON_EXTLOAD;
5267
5268 switch (V.getNode()->getOpcode()) {
5269 case ISD::LOAD: {
5270 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5271 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5272 (LoadNode->getMemoryVT() == MVT::i16)) {
5273 ExtType = LoadNode->getExtensionType();
5274 return true;
5275 }
5276 return false;
5277 }
5278 case ISD::AssertSext: {
5279 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5280 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5281 ExtType = ISD::SEXTLOAD;
5282 return true;
5283 }
5284 return false;
5285 }
5286 case ISD::AssertZext: {
5287 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5288 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5289 ExtType = ISD::ZEXTLOAD;
5290 return true;
5291 }
5292 return false;
5293 }
5294 default:
5295 return false;
5296 }
5297
5298 return false;
5299}
5300
5301// Eliminate redundant truncation and zero-extension nodes.
5302// * Case 1:
5303// +------------+ +------------+ +------------+
5304// | Input1 | | Input2 | | CC |
5305// +------------+ +------------+ +------------+
5306// | | |
5307// V V +----+
5308// +------------+ +------------+ |
5309// | TRUNCATE | | TRUNCATE | |
5310// +------------+ +------------+ |
5311// | | |
5312// V V |
5313// +------------+ +------------+ |
5314// | ZERO_EXT | | ZERO_EXT | |
5315// +------------+ +------------+ |
5316// | | |
5317// | +-------------+ |
5318// V V | |
5319// +----------------+ | |
5320// | AND | | |
5321// +----------------+ | |
5322// | | |
5323// +---------------+ | |
5324// | | |
5325// V V V
5326// +-------------+
5327// | CMP |
5328// +-------------+
5329// * Case 2:
5330// +------------+ +------------+ +-------------+ +------------+ +------------+
5331// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5332// +------------+ +------------+ +-------------+ +------------+ +------------+
5333// | | | | |
5334// V | | | |
5335// +------------+ | | | |
5336// | XOR |<---------------------+ | |
5337// +------------+ | | |
5338// | | | |
5339// V V +---------------+ |
5340// +------------+ +------------+ | |
5341// | TRUNCATE | | TRUNCATE | | +-------------------------+
5342// +------------+ +------------+ | |
5343// | | | |
5344// V V | |
5345// +------------+ +------------+ | |
5346// | ZERO_EXT | | ZERO_EXT | | |
5347// +------------+ +------------+ | |
5348// | | | |
5349// V V | |
5350// +----------------+ | |
5351// | AND | | |
5352// +----------------+ | |
5353// | | |
5354// +---------------+ | |
5355// | | |
5356// V V V
5357// +-------------+
5358// | CMP |
5359// +-------------+
5362 const LoongArchSubtarget &Subtarget) {
5363 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5364
5365 SDNode *AndNode = N->getOperand(0).getNode();
5366 if (AndNode->getOpcode() != ISD::AND)
5367 return SDValue();
5368
5369 SDValue AndInputValue2 = AndNode->getOperand(1);
5370 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5371 return SDValue();
5372
5373 SDValue CmpInputValue = N->getOperand(1);
5374 SDValue AndInputValue1 = AndNode->getOperand(0);
5375 if (AndInputValue1.getOpcode() == ISD::XOR) {
5376 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5377 return SDValue();
5378 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5379 if (!CN || CN->getSExtValue() != -1)
5380 return SDValue();
5381 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5382 if (!CN || CN->getSExtValue() != 0)
5383 return SDValue();
5384 AndInputValue1 = AndInputValue1.getOperand(0);
5385 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5386 return SDValue();
5387 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5388 if (AndInputValue2 != CmpInputValue)
5389 return SDValue();
5390 } else {
5391 return SDValue();
5392 }
5393
5394 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5395 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5396 return SDValue();
5397
5398 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5399 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5400 return SDValue();
5401
5402 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5403 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5404 ISD::LoadExtType ExtType1;
5405 ISD::LoadExtType ExtType2;
5406
5407 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5408 !checkValueWidth(TruncInputValue2, ExtType2))
5409 return SDValue();
5410
5411 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5412 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5413 return SDValue();
5414
5415 if ((ExtType2 != ISD::ZEXTLOAD) &&
5416 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5417 return SDValue();
5418
5419 // These truncation and zero-extension nodes are not necessary, remove them.
5420 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5421 TruncInputValue1, TruncInputValue2);
5422 SDValue NewSetCC =
5423 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5424 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5425 return SDValue(N, 0);
5426}
5427
5428// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5431 const LoongArchSubtarget &Subtarget) {
5432 if (DCI.isBeforeLegalizeOps())
5433 return SDValue();
5434
5435 SDValue Src = N->getOperand(0);
5436 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5437 return SDValue();
5438
5439 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5440 Src.getOperand(0));
5441}
5442
5443// Perform common combines for BR_CC and SELECT_CC conditions.
5444static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5445 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5446 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5447
5448 // As far as arithmetic right shift always saves the sign,
5449 // shift can be omitted.
5450 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5451 // setge (sra X, N), 0 -> setge X, 0
5452 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5453 LHS.getOpcode() == ISD::SRA) {
5454 LHS = LHS.getOperand(0);
5455 return true;
5456 }
5457
5458 if (!ISD::isIntEqualitySetCC(CCVal))
5459 return false;
5460
5461 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5462 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5463 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5464 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5465 // If we're looking for eq 0 instead of ne 0, we need to invert the
5466 // condition.
5467 bool Invert = CCVal == ISD::SETEQ;
5468 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5469 if (Invert)
5470 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5471
5472 RHS = LHS.getOperand(1);
5473 LHS = LHS.getOperand(0);
5474 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5475
5476 CC = DAG.getCondCode(CCVal);
5477 return true;
5478 }
5479
5480 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5481 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5482 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5483 SDValue LHS0 = LHS.getOperand(0);
5484 if (LHS0.getOpcode() == ISD::AND &&
5485 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5486 uint64_t Mask = LHS0.getConstantOperandVal(1);
5487 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5488 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5489 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5490 CC = DAG.getCondCode(CCVal);
5491
5492 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5493 LHS = LHS0.getOperand(0);
5494 if (ShAmt != 0)
5495 LHS =
5496 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5497 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5498 return true;
5499 }
5500 }
5501 }
5502
5503 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5504 // This can occur when legalizing some floating point comparisons.
5505 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5506 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5507 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5508 CC = DAG.getCondCode(CCVal);
5509 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5510 return true;
5511 }
5512
5513 return false;
5514}
5515
5518 const LoongArchSubtarget &Subtarget) {
5519 SDValue LHS = N->getOperand(1);
5520 SDValue RHS = N->getOperand(2);
5521 SDValue CC = N->getOperand(3);
5522 SDLoc DL(N);
5523
5524 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5525 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5526 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5527
5528 return SDValue();
5529}
5530
5533 const LoongArchSubtarget &Subtarget) {
5534 // Transform
5535 SDValue LHS = N->getOperand(0);
5536 SDValue RHS = N->getOperand(1);
5537 SDValue CC = N->getOperand(2);
5538 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5539 SDValue TrueV = N->getOperand(3);
5540 SDValue FalseV = N->getOperand(4);
5541 SDLoc DL(N);
5542 EVT VT = N->getValueType(0);
5543
5544 // If the True and False values are the same, we don't need a select_cc.
5545 if (TrueV == FalseV)
5546 return TrueV;
5547
5548 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5549 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5550 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5552 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5553 if (CCVal == ISD::CondCode::SETGE)
5554 std::swap(TrueV, FalseV);
5555
5556 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5557 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5558 // Only handle simm12, if it is not in this range, it can be considered as
5559 // register.
5560 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5561 isInt<12>(TrueSImm - FalseSImm)) {
5562 SDValue SRA =
5563 DAG.getNode(ISD::SRA, DL, VT, LHS,
5564 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5565 SDValue AND =
5566 DAG.getNode(ISD::AND, DL, VT, SRA,
5567 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5568 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5569 }
5570
5571 if (CCVal == ISD::CondCode::SETGE)
5572 std::swap(TrueV, FalseV);
5573 }
5574
5575 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5576 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5577 {LHS, RHS, CC, TrueV, FalseV});
5578
5579 return SDValue();
5580}
5581
5582template <unsigned N>
5584 SelectionDAG &DAG,
5585 const LoongArchSubtarget &Subtarget,
5586 bool IsSigned = false) {
5587 SDLoc DL(Node);
5588 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5589 // Check the ImmArg.
5590 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5591 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5592 DAG.getContext()->emitError(Node->getOperationName(0) +
5593 ": argument out of range.");
5594 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5595 }
5596 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5597}
5598
5599template <unsigned N>
5600static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5601 SelectionDAG &DAG, bool IsSigned = false) {
5602 SDLoc DL(Node);
5603 EVT ResTy = Node->getValueType(0);
5604 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5605
5606 // Check the ImmArg.
5607 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5608 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5609 DAG.getContext()->emitError(Node->getOperationName(0) +
5610 ": argument out of range.");
5611 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5612 }
5613 return DAG.getConstant(
5615 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5616 DL, ResTy);
5617}
5618
5620 SDLoc DL(Node);
5621 EVT ResTy = Node->getValueType(0);
5622 SDValue Vec = Node->getOperand(2);
5623 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5624 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5625}
5626
5628 SDLoc DL(Node);
5629 EVT ResTy = Node->getValueType(0);
5630 SDValue One = DAG.getConstant(1, DL, ResTy);
5631 SDValue Bit =
5632 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5633
5634 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5635 DAG.getNOT(DL, Bit, ResTy));
5636}
5637
5638template <unsigned N>
5640 SDLoc DL(Node);
5641 EVT ResTy = Node->getValueType(0);
5642 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5643 // Check the unsigned ImmArg.
5644 if (!isUInt<N>(CImm->getZExtValue())) {
5645 DAG.getContext()->emitError(Node->getOperationName(0) +
5646 ": argument out of range.");
5647 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5648 }
5649
5650 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5651 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5652
5653 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5654}
5655
5656template <unsigned N>
5658 SDLoc DL(Node);
5659 EVT ResTy = Node->getValueType(0);
5660 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5661 // Check the unsigned ImmArg.
5662 if (!isUInt<N>(CImm->getZExtValue())) {
5663 DAG.getContext()->emitError(Node->getOperationName(0) +
5664 ": argument out of range.");
5665 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5666 }
5667
5668 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5669 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5670 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5671}
5672
5673template <unsigned N>
5675 SDLoc DL(Node);
5676 EVT ResTy = Node->getValueType(0);
5677 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5678 // Check the unsigned ImmArg.
5679 if (!isUInt<N>(CImm->getZExtValue())) {
5680 DAG.getContext()->emitError(Node->getOperationName(0) +
5681 ": argument out of range.");
5682 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5683 }
5684
5685 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5686 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5687 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5688}
5689
5690static SDValue
5693 const LoongArchSubtarget &Subtarget) {
5694 SDLoc DL(N);
5695 switch (N->getConstantOperandVal(0)) {
5696 default:
5697 break;
5698 case Intrinsic::loongarch_lsx_vadd_b:
5699 case Intrinsic::loongarch_lsx_vadd_h:
5700 case Intrinsic::loongarch_lsx_vadd_w:
5701 case Intrinsic::loongarch_lsx_vadd_d:
5702 case Intrinsic::loongarch_lasx_xvadd_b:
5703 case Intrinsic::loongarch_lasx_xvadd_h:
5704 case Intrinsic::loongarch_lasx_xvadd_w:
5705 case Intrinsic::loongarch_lasx_xvadd_d:
5706 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5707 N->getOperand(2));
5708 case Intrinsic::loongarch_lsx_vaddi_bu:
5709 case Intrinsic::loongarch_lsx_vaddi_hu:
5710 case Intrinsic::loongarch_lsx_vaddi_wu:
5711 case Intrinsic::loongarch_lsx_vaddi_du:
5712 case Intrinsic::loongarch_lasx_xvaddi_bu:
5713 case Intrinsic::loongarch_lasx_xvaddi_hu:
5714 case Intrinsic::loongarch_lasx_xvaddi_wu:
5715 case Intrinsic::loongarch_lasx_xvaddi_du:
5716 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5717 lowerVectorSplatImm<5>(N, 2, DAG));
5718 case Intrinsic::loongarch_lsx_vsub_b:
5719 case Intrinsic::loongarch_lsx_vsub_h:
5720 case Intrinsic::loongarch_lsx_vsub_w:
5721 case Intrinsic::loongarch_lsx_vsub_d:
5722 case Intrinsic::loongarch_lasx_xvsub_b:
5723 case Intrinsic::loongarch_lasx_xvsub_h:
5724 case Intrinsic::loongarch_lasx_xvsub_w:
5725 case Intrinsic::loongarch_lasx_xvsub_d:
5726 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5727 N->getOperand(2));
5728 case Intrinsic::loongarch_lsx_vsubi_bu:
5729 case Intrinsic::loongarch_lsx_vsubi_hu:
5730 case Intrinsic::loongarch_lsx_vsubi_wu:
5731 case Intrinsic::loongarch_lsx_vsubi_du:
5732 case Intrinsic::loongarch_lasx_xvsubi_bu:
5733 case Intrinsic::loongarch_lasx_xvsubi_hu:
5734 case Intrinsic::loongarch_lasx_xvsubi_wu:
5735 case Intrinsic::loongarch_lasx_xvsubi_du:
5736 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5737 lowerVectorSplatImm<5>(N, 2, DAG));
5738 case Intrinsic::loongarch_lsx_vneg_b:
5739 case Intrinsic::loongarch_lsx_vneg_h:
5740 case Intrinsic::loongarch_lsx_vneg_w:
5741 case Intrinsic::loongarch_lsx_vneg_d:
5742 case Intrinsic::loongarch_lasx_xvneg_b:
5743 case Intrinsic::loongarch_lasx_xvneg_h:
5744 case Intrinsic::loongarch_lasx_xvneg_w:
5745 case Intrinsic::loongarch_lasx_xvneg_d:
5746 return DAG.getNode(
5747 ISD::SUB, DL, N->getValueType(0),
5748 DAG.getConstant(
5749 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5750 /*isSigned=*/true),
5751 SDLoc(N), N->getValueType(0)),
5752 N->getOperand(1));
5753 case Intrinsic::loongarch_lsx_vmax_b:
5754 case Intrinsic::loongarch_lsx_vmax_h:
5755 case Intrinsic::loongarch_lsx_vmax_w:
5756 case Intrinsic::loongarch_lsx_vmax_d:
5757 case Intrinsic::loongarch_lasx_xvmax_b:
5758 case Intrinsic::loongarch_lasx_xvmax_h:
5759 case Intrinsic::loongarch_lasx_xvmax_w:
5760 case Intrinsic::loongarch_lasx_xvmax_d:
5761 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5762 N->getOperand(2));
5763 case Intrinsic::loongarch_lsx_vmax_bu:
5764 case Intrinsic::loongarch_lsx_vmax_hu:
5765 case Intrinsic::loongarch_lsx_vmax_wu:
5766 case Intrinsic::loongarch_lsx_vmax_du:
5767 case Intrinsic::loongarch_lasx_xvmax_bu:
5768 case Intrinsic::loongarch_lasx_xvmax_hu:
5769 case Intrinsic::loongarch_lasx_xvmax_wu:
5770 case Intrinsic::loongarch_lasx_xvmax_du:
5771 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5772 N->getOperand(2));
5773 case Intrinsic::loongarch_lsx_vmaxi_b:
5774 case Intrinsic::loongarch_lsx_vmaxi_h:
5775 case Intrinsic::loongarch_lsx_vmaxi_w:
5776 case Intrinsic::loongarch_lsx_vmaxi_d:
5777 case Intrinsic::loongarch_lasx_xvmaxi_b:
5778 case Intrinsic::loongarch_lasx_xvmaxi_h:
5779 case Intrinsic::loongarch_lasx_xvmaxi_w:
5780 case Intrinsic::loongarch_lasx_xvmaxi_d:
5781 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5782 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5783 case Intrinsic::loongarch_lsx_vmaxi_bu:
5784 case Intrinsic::loongarch_lsx_vmaxi_hu:
5785 case Intrinsic::loongarch_lsx_vmaxi_wu:
5786 case Intrinsic::loongarch_lsx_vmaxi_du:
5787 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5788 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5789 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5790 case Intrinsic::loongarch_lasx_xvmaxi_du:
5791 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5792 lowerVectorSplatImm<5>(N, 2, DAG));
5793 case Intrinsic::loongarch_lsx_vmin_b:
5794 case Intrinsic::loongarch_lsx_vmin_h:
5795 case Intrinsic::loongarch_lsx_vmin_w:
5796 case Intrinsic::loongarch_lsx_vmin_d:
5797 case Intrinsic::loongarch_lasx_xvmin_b:
5798 case Intrinsic::loongarch_lasx_xvmin_h:
5799 case Intrinsic::loongarch_lasx_xvmin_w:
5800 case Intrinsic::loongarch_lasx_xvmin_d:
5801 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5802 N->getOperand(2));
5803 case Intrinsic::loongarch_lsx_vmin_bu:
5804 case Intrinsic::loongarch_lsx_vmin_hu:
5805 case Intrinsic::loongarch_lsx_vmin_wu:
5806 case Intrinsic::loongarch_lsx_vmin_du:
5807 case Intrinsic::loongarch_lasx_xvmin_bu:
5808 case Intrinsic::loongarch_lasx_xvmin_hu:
5809 case Intrinsic::loongarch_lasx_xvmin_wu:
5810 case Intrinsic::loongarch_lasx_xvmin_du:
5811 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5812 N->getOperand(2));
5813 case Intrinsic::loongarch_lsx_vmini_b:
5814 case Intrinsic::loongarch_lsx_vmini_h:
5815 case Intrinsic::loongarch_lsx_vmini_w:
5816 case Intrinsic::loongarch_lsx_vmini_d:
5817 case Intrinsic::loongarch_lasx_xvmini_b:
5818 case Intrinsic::loongarch_lasx_xvmini_h:
5819 case Intrinsic::loongarch_lasx_xvmini_w:
5820 case Intrinsic::loongarch_lasx_xvmini_d:
5821 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5822 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5823 case Intrinsic::loongarch_lsx_vmini_bu:
5824 case Intrinsic::loongarch_lsx_vmini_hu:
5825 case Intrinsic::loongarch_lsx_vmini_wu:
5826 case Intrinsic::loongarch_lsx_vmini_du:
5827 case Intrinsic::loongarch_lasx_xvmini_bu:
5828 case Intrinsic::loongarch_lasx_xvmini_hu:
5829 case Intrinsic::loongarch_lasx_xvmini_wu:
5830 case Intrinsic::loongarch_lasx_xvmini_du:
5831 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5832 lowerVectorSplatImm<5>(N, 2, DAG));
5833 case Intrinsic::loongarch_lsx_vmul_b:
5834 case Intrinsic::loongarch_lsx_vmul_h:
5835 case Intrinsic::loongarch_lsx_vmul_w:
5836 case Intrinsic::loongarch_lsx_vmul_d:
5837 case Intrinsic::loongarch_lasx_xvmul_b:
5838 case Intrinsic::loongarch_lasx_xvmul_h:
5839 case Intrinsic::loongarch_lasx_xvmul_w:
5840 case Intrinsic::loongarch_lasx_xvmul_d:
5841 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5842 N->getOperand(2));
5843 case Intrinsic::loongarch_lsx_vmadd_b:
5844 case Intrinsic::loongarch_lsx_vmadd_h:
5845 case Intrinsic::loongarch_lsx_vmadd_w:
5846 case Intrinsic::loongarch_lsx_vmadd_d:
5847 case Intrinsic::loongarch_lasx_xvmadd_b:
5848 case Intrinsic::loongarch_lasx_xvmadd_h:
5849 case Intrinsic::loongarch_lasx_xvmadd_w:
5850 case Intrinsic::loongarch_lasx_xvmadd_d: {
5851 EVT ResTy = N->getValueType(0);
5852 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5853 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5854 N->getOperand(3)));
5855 }
5856 case Intrinsic::loongarch_lsx_vmsub_b:
5857 case Intrinsic::loongarch_lsx_vmsub_h:
5858 case Intrinsic::loongarch_lsx_vmsub_w:
5859 case Intrinsic::loongarch_lsx_vmsub_d:
5860 case Intrinsic::loongarch_lasx_xvmsub_b:
5861 case Intrinsic::loongarch_lasx_xvmsub_h:
5862 case Intrinsic::loongarch_lasx_xvmsub_w:
5863 case Intrinsic::loongarch_lasx_xvmsub_d: {
5864 EVT ResTy = N->getValueType(0);
5865 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5866 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5867 N->getOperand(3)));
5868 }
5869 case Intrinsic::loongarch_lsx_vdiv_b:
5870 case Intrinsic::loongarch_lsx_vdiv_h:
5871 case Intrinsic::loongarch_lsx_vdiv_w:
5872 case Intrinsic::loongarch_lsx_vdiv_d:
5873 case Intrinsic::loongarch_lasx_xvdiv_b:
5874 case Intrinsic::loongarch_lasx_xvdiv_h:
5875 case Intrinsic::loongarch_lasx_xvdiv_w:
5876 case Intrinsic::loongarch_lasx_xvdiv_d:
5877 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5878 N->getOperand(2));
5879 case Intrinsic::loongarch_lsx_vdiv_bu:
5880 case Intrinsic::loongarch_lsx_vdiv_hu:
5881 case Intrinsic::loongarch_lsx_vdiv_wu:
5882 case Intrinsic::loongarch_lsx_vdiv_du:
5883 case Intrinsic::loongarch_lasx_xvdiv_bu:
5884 case Intrinsic::loongarch_lasx_xvdiv_hu:
5885 case Intrinsic::loongarch_lasx_xvdiv_wu:
5886 case Intrinsic::loongarch_lasx_xvdiv_du:
5887 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5888 N->getOperand(2));
5889 case Intrinsic::loongarch_lsx_vmod_b:
5890 case Intrinsic::loongarch_lsx_vmod_h:
5891 case Intrinsic::loongarch_lsx_vmod_w:
5892 case Intrinsic::loongarch_lsx_vmod_d:
5893 case Intrinsic::loongarch_lasx_xvmod_b:
5894 case Intrinsic::loongarch_lasx_xvmod_h:
5895 case Intrinsic::loongarch_lasx_xvmod_w:
5896 case Intrinsic::loongarch_lasx_xvmod_d:
5897 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5898 N->getOperand(2));
5899 case Intrinsic::loongarch_lsx_vmod_bu:
5900 case Intrinsic::loongarch_lsx_vmod_hu:
5901 case Intrinsic::loongarch_lsx_vmod_wu:
5902 case Intrinsic::loongarch_lsx_vmod_du:
5903 case Intrinsic::loongarch_lasx_xvmod_bu:
5904 case Intrinsic::loongarch_lasx_xvmod_hu:
5905 case Intrinsic::loongarch_lasx_xvmod_wu:
5906 case Intrinsic::loongarch_lasx_xvmod_du:
5907 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5908 N->getOperand(2));
5909 case Intrinsic::loongarch_lsx_vand_v:
5910 case Intrinsic::loongarch_lasx_xvand_v:
5911 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5912 N->getOperand(2));
5913 case Intrinsic::loongarch_lsx_vor_v:
5914 case Intrinsic::loongarch_lasx_xvor_v:
5915 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5916 N->getOperand(2));
5917 case Intrinsic::loongarch_lsx_vxor_v:
5918 case Intrinsic::loongarch_lasx_xvxor_v:
5919 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5920 N->getOperand(2));
5921 case Intrinsic::loongarch_lsx_vnor_v:
5922 case Intrinsic::loongarch_lasx_xvnor_v: {
5923 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5924 N->getOperand(2));
5925 return DAG.getNOT(DL, Res, Res->getValueType(0));
5926 }
5927 case Intrinsic::loongarch_lsx_vandi_b:
5928 case Intrinsic::loongarch_lasx_xvandi_b:
5929 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5930 lowerVectorSplatImm<8>(N, 2, DAG));
5931 case Intrinsic::loongarch_lsx_vori_b:
5932 case Intrinsic::loongarch_lasx_xvori_b:
5933 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5934 lowerVectorSplatImm<8>(N, 2, DAG));
5935 case Intrinsic::loongarch_lsx_vxori_b:
5936 case Intrinsic::loongarch_lasx_xvxori_b:
5937 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5938 lowerVectorSplatImm<8>(N, 2, DAG));
5939 case Intrinsic::loongarch_lsx_vsll_b:
5940 case Intrinsic::loongarch_lsx_vsll_h:
5941 case Intrinsic::loongarch_lsx_vsll_w:
5942 case Intrinsic::loongarch_lsx_vsll_d:
5943 case Intrinsic::loongarch_lasx_xvsll_b:
5944 case Intrinsic::loongarch_lasx_xvsll_h:
5945 case Intrinsic::loongarch_lasx_xvsll_w:
5946 case Intrinsic::loongarch_lasx_xvsll_d:
5947 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5948 truncateVecElts(N, DAG));
5949 case Intrinsic::loongarch_lsx_vslli_b:
5950 case Intrinsic::loongarch_lasx_xvslli_b:
5951 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5952 lowerVectorSplatImm<3>(N, 2, DAG));
5953 case Intrinsic::loongarch_lsx_vslli_h:
5954 case Intrinsic::loongarch_lasx_xvslli_h:
5955 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5956 lowerVectorSplatImm<4>(N, 2, DAG));
5957 case Intrinsic::loongarch_lsx_vslli_w:
5958 case Intrinsic::loongarch_lasx_xvslli_w:
5959 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5960 lowerVectorSplatImm<5>(N, 2, DAG));
5961 case Intrinsic::loongarch_lsx_vslli_d:
5962 case Intrinsic::loongarch_lasx_xvslli_d:
5963 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5964 lowerVectorSplatImm<6>(N, 2, DAG));
5965 case Intrinsic::loongarch_lsx_vsrl_b:
5966 case Intrinsic::loongarch_lsx_vsrl_h:
5967 case Intrinsic::loongarch_lsx_vsrl_w:
5968 case Intrinsic::loongarch_lsx_vsrl_d:
5969 case Intrinsic::loongarch_lasx_xvsrl_b:
5970 case Intrinsic::loongarch_lasx_xvsrl_h:
5971 case Intrinsic::loongarch_lasx_xvsrl_w:
5972 case Intrinsic::loongarch_lasx_xvsrl_d:
5973 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5974 truncateVecElts(N, DAG));
5975 case Intrinsic::loongarch_lsx_vsrli_b:
5976 case Intrinsic::loongarch_lasx_xvsrli_b:
5977 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5978 lowerVectorSplatImm<3>(N, 2, DAG));
5979 case Intrinsic::loongarch_lsx_vsrli_h:
5980 case Intrinsic::loongarch_lasx_xvsrli_h:
5981 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5982 lowerVectorSplatImm<4>(N, 2, DAG));
5983 case Intrinsic::loongarch_lsx_vsrli_w:
5984 case Intrinsic::loongarch_lasx_xvsrli_w:
5985 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5986 lowerVectorSplatImm<5>(N, 2, DAG));
5987 case Intrinsic::loongarch_lsx_vsrli_d:
5988 case Intrinsic::loongarch_lasx_xvsrli_d:
5989 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5990 lowerVectorSplatImm<6>(N, 2, DAG));
5991 case Intrinsic::loongarch_lsx_vsra_b:
5992 case Intrinsic::loongarch_lsx_vsra_h:
5993 case Intrinsic::loongarch_lsx_vsra_w:
5994 case Intrinsic::loongarch_lsx_vsra_d:
5995 case Intrinsic::loongarch_lasx_xvsra_b:
5996 case Intrinsic::loongarch_lasx_xvsra_h:
5997 case Intrinsic::loongarch_lasx_xvsra_w:
5998 case Intrinsic::loongarch_lasx_xvsra_d:
5999 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6000 truncateVecElts(N, DAG));
6001 case Intrinsic::loongarch_lsx_vsrai_b:
6002 case Intrinsic::loongarch_lasx_xvsrai_b:
6003 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6004 lowerVectorSplatImm<3>(N, 2, DAG));
6005 case Intrinsic::loongarch_lsx_vsrai_h:
6006 case Intrinsic::loongarch_lasx_xvsrai_h:
6007 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6008 lowerVectorSplatImm<4>(N, 2, DAG));
6009 case Intrinsic::loongarch_lsx_vsrai_w:
6010 case Intrinsic::loongarch_lasx_xvsrai_w:
6011 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6012 lowerVectorSplatImm<5>(N, 2, DAG));
6013 case Intrinsic::loongarch_lsx_vsrai_d:
6014 case Intrinsic::loongarch_lasx_xvsrai_d:
6015 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6016 lowerVectorSplatImm<6>(N, 2, DAG));
6017 case Intrinsic::loongarch_lsx_vclz_b:
6018 case Intrinsic::loongarch_lsx_vclz_h:
6019 case Intrinsic::loongarch_lsx_vclz_w:
6020 case Intrinsic::loongarch_lsx_vclz_d:
6021 case Intrinsic::loongarch_lasx_xvclz_b:
6022 case Intrinsic::loongarch_lasx_xvclz_h:
6023 case Intrinsic::loongarch_lasx_xvclz_w:
6024 case Intrinsic::loongarch_lasx_xvclz_d:
6025 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6026 case Intrinsic::loongarch_lsx_vpcnt_b:
6027 case Intrinsic::loongarch_lsx_vpcnt_h:
6028 case Intrinsic::loongarch_lsx_vpcnt_w:
6029 case Intrinsic::loongarch_lsx_vpcnt_d:
6030 case Intrinsic::loongarch_lasx_xvpcnt_b:
6031 case Intrinsic::loongarch_lasx_xvpcnt_h:
6032 case Intrinsic::loongarch_lasx_xvpcnt_w:
6033 case Intrinsic::loongarch_lasx_xvpcnt_d:
6034 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6035 case Intrinsic::loongarch_lsx_vbitclr_b:
6036 case Intrinsic::loongarch_lsx_vbitclr_h:
6037 case Intrinsic::loongarch_lsx_vbitclr_w:
6038 case Intrinsic::loongarch_lsx_vbitclr_d:
6039 case Intrinsic::loongarch_lasx_xvbitclr_b:
6040 case Intrinsic::loongarch_lasx_xvbitclr_h:
6041 case Intrinsic::loongarch_lasx_xvbitclr_w:
6042 case Intrinsic::loongarch_lasx_xvbitclr_d:
6043 return lowerVectorBitClear(N, DAG);
6044 case Intrinsic::loongarch_lsx_vbitclri_b:
6045 case Intrinsic::loongarch_lasx_xvbitclri_b:
6046 return lowerVectorBitClearImm<3>(N, DAG);
6047 case Intrinsic::loongarch_lsx_vbitclri_h:
6048 case Intrinsic::loongarch_lasx_xvbitclri_h:
6049 return lowerVectorBitClearImm<4>(N, DAG);
6050 case Intrinsic::loongarch_lsx_vbitclri_w:
6051 case Intrinsic::loongarch_lasx_xvbitclri_w:
6052 return lowerVectorBitClearImm<5>(N, DAG);
6053 case Intrinsic::loongarch_lsx_vbitclri_d:
6054 case Intrinsic::loongarch_lasx_xvbitclri_d:
6055 return lowerVectorBitClearImm<6>(N, DAG);
6056 case Intrinsic::loongarch_lsx_vbitset_b:
6057 case Intrinsic::loongarch_lsx_vbitset_h:
6058 case Intrinsic::loongarch_lsx_vbitset_w:
6059 case Intrinsic::loongarch_lsx_vbitset_d:
6060 case Intrinsic::loongarch_lasx_xvbitset_b:
6061 case Intrinsic::loongarch_lasx_xvbitset_h:
6062 case Intrinsic::loongarch_lasx_xvbitset_w:
6063 case Intrinsic::loongarch_lasx_xvbitset_d: {
6064 EVT VecTy = N->getValueType(0);
6065 SDValue One = DAG.getConstant(1, DL, VecTy);
6066 return DAG.getNode(
6067 ISD::OR, DL, VecTy, N->getOperand(1),
6068 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6069 }
6070 case Intrinsic::loongarch_lsx_vbitseti_b:
6071 case Intrinsic::loongarch_lasx_xvbitseti_b:
6072 return lowerVectorBitSetImm<3>(N, DAG);
6073 case Intrinsic::loongarch_lsx_vbitseti_h:
6074 case Intrinsic::loongarch_lasx_xvbitseti_h:
6075 return lowerVectorBitSetImm<4>(N, DAG);
6076 case Intrinsic::loongarch_lsx_vbitseti_w:
6077 case Intrinsic::loongarch_lasx_xvbitseti_w:
6078 return lowerVectorBitSetImm<5>(N, DAG);
6079 case Intrinsic::loongarch_lsx_vbitseti_d:
6080 case Intrinsic::loongarch_lasx_xvbitseti_d:
6081 return lowerVectorBitSetImm<6>(N, DAG);
6082 case Intrinsic::loongarch_lsx_vbitrev_b:
6083 case Intrinsic::loongarch_lsx_vbitrev_h:
6084 case Intrinsic::loongarch_lsx_vbitrev_w:
6085 case Intrinsic::loongarch_lsx_vbitrev_d:
6086 case Intrinsic::loongarch_lasx_xvbitrev_b:
6087 case Intrinsic::loongarch_lasx_xvbitrev_h:
6088 case Intrinsic::loongarch_lasx_xvbitrev_w:
6089 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6090 EVT VecTy = N->getValueType(0);
6091 SDValue One = DAG.getConstant(1, DL, VecTy);
6092 return DAG.getNode(
6093 ISD::XOR, DL, VecTy, N->getOperand(1),
6094 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6095 }
6096 case Intrinsic::loongarch_lsx_vbitrevi_b:
6097 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6098 return lowerVectorBitRevImm<3>(N, DAG);
6099 case Intrinsic::loongarch_lsx_vbitrevi_h:
6100 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6101 return lowerVectorBitRevImm<4>(N, DAG);
6102 case Intrinsic::loongarch_lsx_vbitrevi_w:
6103 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6104 return lowerVectorBitRevImm<5>(N, DAG);
6105 case Intrinsic::loongarch_lsx_vbitrevi_d:
6106 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6107 return lowerVectorBitRevImm<6>(N, DAG);
6108 case Intrinsic::loongarch_lsx_vfadd_s:
6109 case Intrinsic::loongarch_lsx_vfadd_d:
6110 case Intrinsic::loongarch_lasx_xvfadd_s:
6111 case Intrinsic::loongarch_lasx_xvfadd_d:
6112 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6113 N->getOperand(2));
6114 case Intrinsic::loongarch_lsx_vfsub_s:
6115 case Intrinsic::loongarch_lsx_vfsub_d:
6116 case Intrinsic::loongarch_lasx_xvfsub_s:
6117 case Intrinsic::loongarch_lasx_xvfsub_d:
6118 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6119 N->getOperand(2));
6120 case Intrinsic::loongarch_lsx_vfmul_s:
6121 case Intrinsic::loongarch_lsx_vfmul_d:
6122 case Intrinsic::loongarch_lasx_xvfmul_s:
6123 case Intrinsic::loongarch_lasx_xvfmul_d:
6124 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6125 N->getOperand(2));
6126 case Intrinsic::loongarch_lsx_vfdiv_s:
6127 case Intrinsic::loongarch_lsx_vfdiv_d:
6128 case Intrinsic::loongarch_lasx_xvfdiv_s:
6129 case Intrinsic::loongarch_lasx_xvfdiv_d:
6130 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6131 N->getOperand(2));
6132 case Intrinsic::loongarch_lsx_vfmadd_s:
6133 case Intrinsic::loongarch_lsx_vfmadd_d:
6134 case Intrinsic::loongarch_lasx_xvfmadd_s:
6135 case Intrinsic::loongarch_lasx_xvfmadd_d:
6136 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6137 N->getOperand(2), N->getOperand(3));
6138 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6139 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6140 N->getOperand(1), N->getOperand(2),
6141 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6142 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6143 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6144 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6145 N->getOperand(1), N->getOperand(2),
6146 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6147 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6148 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6149 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6150 N->getOperand(1), N->getOperand(2),
6151 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6152 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6153 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6154 N->getOperand(1), N->getOperand(2),
6155 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6156 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6157 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6158 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6159 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6160 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6161 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6162 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6163 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6164 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6165 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6166 N->getOperand(1)));
6167 case Intrinsic::loongarch_lsx_vreplve_b:
6168 case Intrinsic::loongarch_lsx_vreplve_h:
6169 case Intrinsic::loongarch_lsx_vreplve_w:
6170 case Intrinsic::loongarch_lsx_vreplve_d:
6171 case Intrinsic::loongarch_lasx_xvreplve_b:
6172 case Intrinsic::loongarch_lasx_xvreplve_h:
6173 case Intrinsic::loongarch_lasx_xvreplve_w:
6174 case Intrinsic::loongarch_lasx_xvreplve_d:
6175 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6176 N->getOperand(1),
6177 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6178 N->getOperand(2)));
6179 }
6180 return SDValue();
6181}
6182
6185 const LoongArchSubtarget &Subtarget) {
6186 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6187 // conversion is unnecessary and can be replaced with the
6188 // MOVFR2GR_S_LA64 operand.
6189 SDValue Op0 = N->getOperand(0);
6191 return Op0.getOperand(0);
6192 return SDValue();
6193}
6194
6197 const LoongArchSubtarget &Subtarget) {
6198 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6199 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6200 // operand.
6201 SDValue Op0 = N->getOperand(0);
6203 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6204 "Unexpected value type!");
6205 return Op0.getOperand(0);
6206 }
6207 return SDValue();
6208}
6209
6212 const LoongArchSubtarget &Subtarget) {
6213 MVT VT = N->getSimpleValueType(0);
6214 unsigned NumBits = VT.getScalarSizeInBits();
6215
6216 // Simplify the inputs.
6217 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6218 APInt DemandedMask(APInt::getAllOnes(NumBits));
6219 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6220 return SDValue(N, 0);
6221
6222 return SDValue();
6223}
6224
6225static SDValue
6228 const LoongArchSubtarget &Subtarget) {
6229 SDValue Op0 = N->getOperand(0);
6230 SDLoc DL(N);
6231
6232 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6233 // redundant. Instead, use BuildPairF64's operands directly.
6235 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6236
6237 if (Op0->isUndef()) {
6238 SDValue Lo = DAG.getUNDEF(MVT::i32);
6239 SDValue Hi = DAG.getUNDEF(MVT::i32);
6240 return DCI.CombineTo(N, Lo, Hi);
6241 }
6242
6243 // It's cheaper to materialise two 32-bit integers than to load a double
6244 // from the constant pool and transfer it to integer registers through the
6245 // stack.
6247 APInt V = C->getValueAPF().bitcastToAPInt();
6248 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6249 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6250 return DCI.CombineTo(N, Lo, Hi);
6251 }
6252
6253 return SDValue();
6254}
6255
6256static SDValue
6259 const LoongArchSubtarget &Subtarget) {
6260 if (!DCI.isBeforeLegalize())
6261 return SDValue();
6262
6263 MVT EltVT = N->getSimpleValueType(0);
6264 SDValue Vec = N->getOperand(0);
6265 EVT VecTy = Vec->getValueType(0);
6266 SDValue Idx = N->getOperand(1);
6267 unsigned IdxOp = Idx.getOpcode();
6268 SDLoc DL(N);
6269
6270 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6271 return SDValue();
6272
6273 // Combine:
6274 // t2 = truncate t1
6275 // t3 = {zero/sign/any}_extend t2
6276 // t4 = extract_vector_elt t0, t3
6277 // to:
6278 // t4 = extract_vector_elt t0, t1
6279 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6280 IdxOp == ISD::ANY_EXTEND) {
6281 SDValue IdxOrig = Idx.getOperand(0);
6282 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6283 return SDValue();
6284
6285 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6286 IdxOrig.getOperand(0));
6287 }
6288
6289 return SDValue();
6290}
6291
6293 DAGCombinerInfo &DCI) const {
6294 SelectionDAG &DAG = DCI.DAG;
6295 switch (N->getOpcode()) {
6296 default:
6297 break;
6298 case ISD::AND:
6299 return performANDCombine(N, DAG, DCI, Subtarget);
6300 case ISD::OR:
6301 return performORCombine(N, DAG, DCI, Subtarget);
6302 case ISD::SETCC:
6303 return performSETCCCombine(N, DAG, DCI, Subtarget);
6304 case ISD::SRL:
6305 return performSRLCombine(N, DAG, DCI, Subtarget);
6306 case ISD::BITCAST:
6307 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6309 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6311 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6313 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6315 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6317 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6319 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6322 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6324 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6326 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6327 }
6328 return SDValue();
6329}
6330
6333 if (!ZeroDivCheck)
6334 return MBB;
6335
6336 // Build instructions:
6337 // MBB:
6338 // div(or mod) $dst, $dividend, $divisor
6339 // bne $divisor, $zero, SinkMBB
6340 // BreakMBB:
6341 // break 7 // BRK_DIVZERO
6342 // SinkMBB:
6343 // fallthrough
6344 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6345 MachineFunction::iterator It = ++MBB->getIterator();
6346 MachineFunction *MF = MBB->getParent();
6347 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6348 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6349 MF->insert(It, BreakMBB);
6350 MF->insert(It, SinkMBB);
6351
6352 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6353 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6354 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6355
6356 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6357 DebugLoc DL = MI.getDebugLoc();
6358 MachineOperand &Divisor = MI.getOperand(2);
6359 Register DivisorReg = Divisor.getReg();
6360
6361 // MBB:
6362 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6363 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6364 .addReg(LoongArch::R0)
6365 .addMBB(SinkMBB);
6366 MBB->addSuccessor(BreakMBB);
6367 MBB->addSuccessor(SinkMBB);
6368
6369 // BreakMBB:
6370 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6371 // definition of BRK_DIVZERO.
6372 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6373 BreakMBB->addSuccessor(SinkMBB);
6374
6375 // Clear Divisor's kill flag.
6376 Divisor.setIsKill(false);
6377
6378 return SinkMBB;
6379}
6380
6381static MachineBasicBlock *
6383 const LoongArchSubtarget &Subtarget) {
6384 unsigned CondOpc;
6385 switch (MI.getOpcode()) {
6386 default:
6387 llvm_unreachable("Unexpected opcode");
6388 case LoongArch::PseudoVBZ:
6389 CondOpc = LoongArch::VSETEQZ_V;
6390 break;
6391 case LoongArch::PseudoVBZ_B:
6392 CondOpc = LoongArch::VSETANYEQZ_B;
6393 break;
6394 case LoongArch::PseudoVBZ_H:
6395 CondOpc = LoongArch::VSETANYEQZ_H;
6396 break;
6397 case LoongArch::PseudoVBZ_W:
6398 CondOpc = LoongArch::VSETANYEQZ_W;
6399 break;
6400 case LoongArch::PseudoVBZ_D:
6401 CondOpc = LoongArch::VSETANYEQZ_D;
6402 break;
6403 case LoongArch::PseudoVBNZ:
6404 CondOpc = LoongArch::VSETNEZ_V;
6405 break;
6406 case LoongArch::PseudoVBNZ_B:
6407 CondOpc = LoongArch::VSETALLNEZ_B;
6408 break;
6409 case LoongArch::PseudoVBNZ_H:
6410 CondOpc = LoongArch::VSETALLNEZ_H;
6411 break;
6412 case LoongArch::PseudoVBNZ_W:
6413 CondOpc = LoongArch::VSETALLNEZ_W;
6414 break;
6415 case LoongArch::PseudoVBNZ_D:
6416 CondOpc = LoongArch::VSETALLNEZ_D;
6417 break;
6418 case LoongArch::PseudoXVBZ:
6419 CondOpc = LoongArch::XVSETEQZ_V;
6420 break;
6421 case LoongArch::PseudoXVBZ_B:
6422 CondOpc = LoongArch::XVSETANYEQZ_B;
6423 break;
6424 case LoongArch::PseudoXVBZ_H:
6425 CondOpc = LoongArch::XVSETANYEQZ_H;
6426 break;
6427 case LoongArch::PseudoXVBZ_W:
6428 CondOpc = LoongArch::XVSETANYEQZ_W;
6429 break;
6430 case LoongArch::PseudoXVBZ_D:
6431 CondOpc = LoongArch::XVSETANYEQZ_D;
6432 break;
6433 case LoongArch::PseudoXVBNZ:
6434 CondOpc = LoongArch::XVSETNEZ_V;
6435 break;
6436 case LoongArch::PseudoXVBNZ_B:
6437 CondOpc = LoongArch::XVSETALLNEZ_B;
6438 break;
6439 case LoongArch::PseudoXVBNZ_H:
6440 CondOpc = LoongArch::XVSETALLNEZ_H;
6441 break;
6442 case LoongArch::PseudoXVBNZ_W:
6443 CondOpc = LoongArch::XVSETALLNEZ_W;
6444 break;
6445 case LoongArch::PseudoXVBNZ_D:
6446 CondOpc = LoongArch::XVSETALLNEZ_D;
6447 break;
6448 }
6449
6450 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6451 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6452 DebugLoc DL = MI.getDebugLoc();
6455
6456 MachineFunction *F = BB->getParent();
6457 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6458 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6459 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6460
6461 F->insert(It, FalseBB);
6462 F->insert(It, TrueBB);
6463 F->insert(It, SinkBB);
6464
6465 // Transfer the remainder of MBB and its successor edges to Sink.
6466 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6468
6469 // Insert the real instruction to BB.
6470 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6471 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6472
6473 // Insert branch.
6474 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6475 BB->addSuccessor(FalseBB);
6476 BB->addSuccessor(TrueBB);
6477
6478 // FalseBB.
6479 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6480 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6481 .addReg(LoongArch::R0)
6482 .addImm(0);
6483 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6484 FalseBB->addSuccessor(SinkBB);
6485
6486 // TrueBB.
6487 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6488 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6489 .addReg(LoongArch::R0)
6490 .addImm(1);
6491 TrueBB->addSuccessor(SinkBB);
6492
6493 // SinkBB: merge the results.
6494 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6495 MI.getOperand(0).getReg())
6496 .addReg(RD1)
6497 .addMBB(FalseBB)
6498 .addReg(RD2)
6499 .addMBB(TrueBB);
6500
6501 // The pseudo instruction is gone now.
6502 MI.eraseFromParent();
6503 return SinkBB;
6504}
6505
6506static MachineBasicBlock *
6508 const LoongArchSubtarget &Subtarget) {
6509 unsigned InsOp;
6510 unsigned BroadcastOp;
6511 unsigned HalfSize;
6512 switch (MI.getOpcode()) {
6513 default:
6514 llvm_unreachable("Unexpected opcode");
6515 case LoongArch::PseudoXVINSGR2VR_B:
6516 HalfSize = 16;
6517 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6518 InsOp = LoongArch::XVEXTRINS_B;
6519 break;
6520 case LoongArch::PseudoXVINSGR2VR_H:
6521 HalfSize = 8;
6522 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6523 InsOp = LoongArch::XVEXTRINS_H;
6524 break;
6525 }
6526 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6527 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6528 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6529 DebugLoc DL = MI.getDebugLoc();
6531 // XDst = vector_insert XSrc, Elt, Idx
6532 Register XDst = MI.getOperand(0).getReg();
6533 Register XSrc = MI.getOperand(1).getReg();
6534 Register Elt = MI.getOperand(2).getReg();
6535 unsigned Idx = MI.getOperand(3).getImm();
6536
6537 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6538 Idx < HalfSize) {
6539 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6540 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6541
6542 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6543 .addReg(XSrc, 0, LoongArch::sub_128);
6544 BuildMI(*BB, MI, DL,
6545 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6546 : LoongArch::VINSGR2VR_B),
6547 ScratchSubReg2)
6548 .addReg(ScratchSubReg1)
6549 .addReg(Elt)
6550 .addImm(Idx);
6551
6552 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6553 .addImm(0)
6554 .addReg(ScratchSubReg2)
6555 .addImm(LoongArch::sub_128);
6556 } else {
6557 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6558 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6559
6560 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6561
6562 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6563 .addReg(ScratchReg1)
6564 .addReg(XSrc)
6565 .addImm(Idx >= HalfSize ? 48 : 18);
6566
6567 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6568 .addReg(XSrc)
6569 .addReg(ScratchReg2)
6570 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6571 }
6572
6573 MI.eraseFromParent();
6574 return BB;
6575}
6576
6579 const LoongArchSubtarget &Subtarget) {
6580 assert(Subtarget.hasExtLSX());
6581 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6582 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6583 DebugLoc DL = MI.getDebugLoc();
6585 Register Dst = MI.getOperand(0).getReg();
6586 Register Src = MI.getOperand(1).getReg();
6587 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6588 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6589 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6590
6591 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6592 BuildMI(*BB, MI, DL,
6593 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6594 : LoongArch::VINSGR2VR_W),
6595 ScratchReg2)
6596 .addReg(ScratchReg1)
6597 .addReg(Src)
6598 .addImm(0);
6599 BuildMI(
6600 *BB, MI, DL,
6601 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6602 ScratchReg3)
6603 .addReg(ScratchReg2);
6604 BuildMI(*BB, MI, DL,
6605 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6606 : LoongArch::VPICKVE2GR_W),
6607 Dst)
6608 .addReg(ScratchReg3)
6609 .addImm(0);
6610
6611 MI.eraseFromParent();
6612 return BB;
6613}
6614
6615static MachineBasicBlock *
6617 const LoongArchSubtarget &Subtarget) {
6618 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6619 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6620 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6622 Register Dst = MI.getOperand(0).getReg();
6623 Register Src = MI.getOperand(1).getReg();
6624 DebugLoc DL = MI.getDebugLoc();
6625 unsigned EleBits = 8;
6626 unsigned NotOpc = 0;
6627 unsigned MskOpc;
6628
6629 switch (MI.getOpcode()) {
6630 default:
6631 llvm_unreachable("Unexpected opcode");
6632 case LoongArch::PseudoVMSKLTZ_B:
6633 MskOpc = LoongArch::VMSKLTZ_B;
6634 break;
6635 case LoongArch::PseudoVMSKLTZ_H:
6636 MskOpc = LoongArch::VMSKLTZ_H;
6637 EleBits = 16;
6638 break;
6639 case LoongArch::PseudoVMSKLTZ_W:
6640 MskOpc = LoongArch::VMSKLTZ_W;
6641 EleBits = 32;
6642 break;
6643 case LoongArch::PseudoVMSKLTZ_D:
6644 MskOpc = LoongArch::VMSKLTZ_D;
6645 EleBits = 64;
6646 break;
6647 case LoongArch::PseudoVMSKGEZ_B:
6648 MskOpc = LoongArch::VMSKGEZ_B;
6649 break;
6650 case LoongArch::PseudoVMSKEQZ_B:
6651 MskOpc = LoongArch::VMSKNZ_B;
6652 NotOpc = LoongArch::VNOR_V;
6653 break;
6654 case LoongArch::PseudoVMSKNEZ_B:
6655 MskOpc = LoongArch::VMSKNZ_B;
6656 break;
6657 case LoongArch::PseudoXVMSKLTZ_B:
6658 MskOpc = LoongArch::XVMSKLTZ_B;
6659 RC = &LoongArch::LASX256RegClass;
6660 break;
6661 case LoongArch::PseudoXVMSKLTZ_H:
6662 MskOpc = LoongArch::XVMSKLTZ_H;
6663 RC = &LoongArch::LASX256RegClass;
6664 EleBits = 16;
6665 break;
6666 case LoongArch::PseudoXVMSKLTZ_W:
6667 MskOpc = LoongArch::XVMSKLTZ_W;
6668 RC = &LoongArch::LASX256RegClass;
6669 EleBits = 32;
6670 break;
6671 case LoongArch::PseudoXVMSKLTZ_D:
6672 MskOpc = LoongArch::XVMSKLTZ_D;
6673 RC = &LoongArch::LASX256RegClass;
6674 EleBits = 64;
6675 break;
6676 case LoongArch::PseudoXVMSKGEZ_B:
6677 MskOpc = LoongArch::XVMSKGEZ_B;
6678 RC = &LoongArch::LASX256RegClass;
6679 break;
6680 case LoongArch::PseudoXVMSKEQZ_B:
6681 MskOpc = LoongArch::XVMSKNZ_B;
6682 NotOpc = LoongArch::XVNOR_V;
6683 RC = &LoongArch::LASX256RegClass;
6684 break;
6685 case LoongArch::PseudoXVMSKNEZ_B:
6686 MskOpc = LoongArch::XVMSKNZ_B;
6687 RC = &LoongArch::LASX256RegClass;
6688 break;
6689 }
6690
6691 Register Msk = MRI.createVirtualRegister(RC);
6692 if (NotOpc) {
6693 Register Tmp = MRI.createVirtualRegister(RC);
6694 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6695 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6696 .addReg(Tmp, RegState::Kill)
6697 .addReg(Tmp, RegState::Kill);
6698 } else {
6699 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6700 }
6701
6702 if (TRI->getRegSizeInBits(*RC) > 128) {
6703 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6704 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6705 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6706 .addReg(Msk)
6707 .addImm(0);
6708 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6709 .addReg(Msk, RegState::Kill)
6710 .addImm(4);
6711 BuildMI(*BB, MI, DL,
6712 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6713 : LoongArch::BSTRINS_W),
6714 Dst)
6717 .addImm(256 / EleBits - 1)
6718 .addImm(128 / EleBits);
6719 } else {
6720 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6721 .addReg(Msk, RegState::Kill)
6722 .addImm(0);
6723 }
6724
6725 MI.eraseFromParent();
6726 return BB;
6727}
6728
6729static MachineBasicBlock *
6731 const LoongArchSubtarget &Subtarget) {
6732 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6733 "Unexpected instruction");
6734
6735 MachineFunction &MF = *BB->getParent();
6736 DebugLoc DL = MI.getDebugLoc();
6738 Register LoReg = MI.getOperand(0).getReg();
6739 Register HiReg = MI.getOperand(1).getReg();
6740 Register SrcReg = MI.getOperand(2).getReg();
6741
6742 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6743 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6744 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6745 MI.eraseFromParent(); // The pseudo instruction is gone now.
6746 return BB;
6747}
6748
6749static MachineBasicBlock *
6751 const LoongArchSubtarget &Subtarget) {
6752 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6753 "Unexpected instruction");
6754
6755 MachineFunction &MF = *BB->getParent();
6756 DebugLoc DL = MI.getDebugLoc();
6759 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6760 Register DstReg = MI.getOperand(0).getReg();
6761 Register LoReg = MI.getOperand(1).getReg();
6762 Register HiReg = MI.getOperand(2).getReg();
6763
6764 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6765 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6766 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6767 .addReg(TmpReg, RegState::Kill)
6768 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6769 MI.eraseFromParent(); // The pseudo instruction is gone now.
6770 return BB;
6771}
6772
6774 switch (MI.getOpcode()) {
6775 default:
6776 return false;
6777 case LoongArch::Select_GPR_Using_CC_GPR:
6778 return true;
6779 }
6780}
6781
6782static MachineBasicBlock *
6784 const LoongArchSubtarget &Subtarget) {
6785 // To "insert" Select_* instructions, we actually have to insert the triangle
6786 // control-flow pattern. The incoming instructions know the destination vreg
6787 // to set, the condition code register to branch on, the true/false values to
6788 // select between, and the condcode to use to select the appropriate branch.
6789 //
6790 // We produce the following control flow:
6791 // HeadMBB
6792 // | \
6793 // | IfFalseMBB
6794 // | /
6795 // TailMBB
6796 //
6797 // When we find a sequence of selects we attempt to optimize their emission
6798 // by sharing the control flow. Currently we only handle cases where we have
6799 // multiple selects with the exact same condition (same LHS, RHS and CC).
6800 // The selects may be interleaved with other instructions if the other
6801 // instructions meet some requirements we deem safe:
6802 // - They are not pseudo instructions.
6803 // - They are debug instructions. Otherwise,
6804 // - They do not have side-effects, do not access memory and their inputs do
6805 // not depend on the results of the select pseudo-instructions.
6806 // The TrueV/FalseV operands of the selects cannot depend on the result of
6807 // previous selects in the sequence.
6808 // These conditions could be further relaxed. See the X86 target for a
6809 // related approach and more information.
6810
6811 Register LHS = MI.getOperand(1).getReg();
6812 Register RHS;
6813 if (MI.getOperand(2).isReg())
6814 RHS = MI.getOperand(2).getReg();
6815 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6816
6817 SmallVector<MachineInstr *, 4> SelectDebugValues;
6818 SmallSet<Register, 4> SelectDests;
6819 SelectDests.insert(MI.getOperand(0).getReg());
6820
6821 MachineInstr *LastSelectPseudo = &MI;
6822 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6823 SequenceMBBI != E; ++SequenceMBBI) {
6824 if (SequenceMBBI->isDebugInstr())
6825 continue;
6826 if (isSelectPseudo(*SequenceMBBI)) {
6827 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6828 !SequenceMBBI->getOperand(2).isReg() ||
6829 SequenceMBBI->getOperand(2).getReg() != RHS ||
6830 SequenceMBBI->getOperand(3).getImm() != CC ||
6831 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6832 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6833 break;
6834 LastSelectPseudo = &*SequenceMBBI;
6835 SequenceMBBI->collectDebugValues(SelectDebugValues);
6836 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6837 continue;
6838 }
6839 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6840 SequenceMBBI->mayLoadOrStore() ||
6841 SequenceMBBI->usesCustomInsertionHook())
6842 break;
6843 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6844 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6845 }))
6846 break;
6847 }
6848
6849 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6850 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6851 DebugLoc DL = MI.getDebugLoc();
6853
6854 MachineBasicBlock *HeadMBB = BB;
6855 MachineFunction *F = BB->getParent();
6856 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6857 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6858
6859 F->insert(I, IfFalseMBB);
6860 F->insert(I, TailMBB);
6861
6862 // Set the call frame size on entry to the new basic blocks.
6863 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6864 IfFalseMBB->setCallFrameSize(CallFrameSize);
6865 TailMBB->setCallFrameSize(CallFrameSize);
6866
6867 // Transfer debug instructions associated with the selects to TailMBB.
6868 for (MachineInstr *DebugInstr : SelectDebugValues) {
6869 TailMBB->push_back(DebugInstr->removeFromParent());
6870 }
6871
6872 // Move all instructions after the sequence to TailMBB.
6873 TailMBB->splice(TailMBB->end(), HeadMBB,
6874 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6875 // Update machine-CFG edges by transferring all successors of the current
6876 // block to the new block which will contain the Phi nodes for the selects.
6877 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6878 // Set the successors for HeadMBB.
6879 HeadMBB->addSuccessor(IfFalseMBB);
6880 HeadMBB->addSuccessor(TailMBB);
6881
6882 // Insert appropriate branch.
6883 if (MI.getOperand(2).isImm())
6884 BuildMI(HeadMBB, DL, TII.get(CC))
6885 .addReg(LHS)
6886 .addImm(MI.getOperand(2).getImm())
6887 .addMBB(TailMBB);
6888 else
6889 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6890
6891 // IfFalseMBB just falls through to TailMBB.
6892 IfFalseMBB->addSuccessor(TailMBB);
6893
6894 // Create PHIs for all of the select pseudo-instructions.
6895 auto SelectMBBI = MI.getIterator();
6896 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6897 auto InsertionPoint = TailMBB->begin();
6898 while (SelectMBBI != SelectEnd) {
6899 auto Next = std::next(SelectMBBI);
6900 if (isSelectPseudo(*SelectMBBI)) {
6901 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6902 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6903 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6904 .addReg(SelectMBBI->getOperand(4).getReg())
6905 .addMBB(HeadMBB)
6906 .addReg(SelectMBBI->getOperand(5).getReg())
6907 .addMBB(IfFalseMBB);
6908 SelectMBBI->eraseFromParent();
6909 }
6910 SelectMBBI = Next;
6911 }
6912
6913 F->getProperties().resetNoPHIs();
6914 return TailMBB;
6915}
6916
6917MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6918 MachineInstr &MI, MachineBasicBlock *BB) const {
6919 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6920 DebugLoc DL = MI.getDebugLoc();
6921
6922 switch (MI.getOpcode()) {
6923 default:
6924 llvm_unreachable("Unexpected instr type to insert");
6925 case LoongArch::DIV_W:
6926 case LoongArch::DIV_WU:
6927 case LoongArch::MOD_W:
6928 case LoongArch::MOD_WU:
6929 case LoongArch::DIV_D:
6930 case LoongArch::DIV_DU:
6931 case LoongArch::MOD_D:
6932 case LoongArch::MOD_DU:
6933 return insertDivByZeroTrap(MI, BB);
6934 break;
6935 case LoongArch::WRFCSR: {
6936 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6937 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6938 .addReg(MI.getOperand(1).getReg());
6939 MI.eraseFromParent();
6940 return BB;
6941 }
6942 case LoongArch::RDFCSR: {
6943 MachineInstr *ReadFCSR =
6944 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6945 MI.getOperand(0).getReg())
6946 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6947 ReadFCSR->getOperand(1).setIsUndef();
6948 MI.eraseFromParent();
6949 return BB;
6950 }
6951 case LoongArch::Select_GPR_Using_CC_GPR:
6952 return emitSelectPseudo(MI, BB, Subtarget);
6953 case LoongArch::BuildPairF64Pseudo:
6954 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6955 case LoongArch::SplitPairF64Pseudo:
6956 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6957 case LoongArch::PseudoVBZ:
6958 case LoongArch::PseudoVBZ_B:
6959 case LoongArch::PseudoVBZ_H:
6960 case LoongArch::PseudoVBZ_W:
6961 case LoongArch::PseudoVBZ_D:
6962 case LoongArch::PseudoVBNZ:
6963 case LoongArch::PseudoVBNZ_B:
6964 case LoongArch::PseudoVBNZ_H:
6965 case LoongArch::PseudoVBNZ_W:
6966 case LoongArch::PseudoVBNZ_D:
6967 case LoongArch::PseudoXVBZ:
6968 case LoongArch::PseudoXVBZ_B:
6969 case LoongArch::PseudoXVBZ_H:
6970 case LoongArch::PseudoXVBZ_W:
6971 case LoongArch::PseudoXVBZ_D:
6972 case LoongArch::PseudoXVBNZ:
6973 case LoongArch::PseudoXVBNZ_B:
6974 case LoongArch::PseudoXVBNZ_H:
6975 case LoongArch::PseudoXVBNZ_W:
6976 case LoongArch::PseudoXVBNZ_D:
6977 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6978 case LoongArch::PseudoXVINSGR2VR_B:
6979 case LoongArch::PseudoXVINSGR2VR_H:
6980 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6981 case LoongArch::PseudoCTPOP:
6982 return emitPseudoCTPOP(MI, BB, Subtarget);
6983 case LoongArch::PseudoVMSKLTZ_B:
6984 case LoongArch::PseudoVMSKLTZ_H:
6985 case LoongArch::PseudoVMSKLTZ_W:
6986 case LoongArch::PseudoVMSKLTZ_D:
6987 case LoongArch::PseudoVMSKGEZ_B:
6988 case LoongArch::PseudoVMSKEQZ_B:
6989 case LoongArch::PseudoVMSKNEZ_B:
6990 case LoongArch::PseudoXVMSKLTZ_B:
6991 case LoongArch::PseudoXVMSKLTZ_H:
6992 case LoongArch::PseudoXVMSKLTZ_W:
6993 case LoongArch::PseudoXVMSKLTZ_D:
6994 case LoongArch::PseudoXVMSKGEZ_B:
6995 case LoongArch::PseudoXVMSKEQZ_B:
6996 case LoongArch::PseudoXVMSKNEZ_B:
6997 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6998 case TargetOpcode::STATEPOINT:
6999 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7000 // while bl call instruction (where statepoint will be lowered at the
7001 // end) has implicit def. This def is early-clobber as it will be set at
7002 // the moment of the call and earlier than any use is read.
7003 // Add this implicit dead def here as a workaround.
7004 MI.addOperand(*MI.getMF(),
7006 LoongArch::R1, /*isDef*/ true,
7007 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7008 /*isUndef*/ false, /*isEarlyClobber*/ true));
7009 if (!Subtarget.is64Bit())
7010 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7011 return emitPatchPoint(MI, BB);
7012 }
7013}
7014
7016 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7017 unsigned *Fast) const {
7018 if (!Subtarget.hasUAL())
7019 return false;
7020
7021 // TODO: set reasonable speed number.
7022 if (Fast)
7023 *Fast = 1;
7024 return true;
7025}
7026
7027const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7028 switch ((LoongArchISD::NodeType)Opcode) {
7030 break;
7031
7032#define NODE_NAME_CASE(node) \
7033 case LoongArchISD::node: \
7034 return "LoongArchISD::" #node;
7035
7036 // TODO: Add more target-dependent nodes later.
7037 NODE_NAME_CASE(CALL)
7038 NODE_NAME_CASE(CALL_MEDIUM)
7039 NODE_NAME_CASE(CALL_LARGE)
7040 NODE_NAME_CASE(RET)
7041 NODE_NAME_CASE(TAIL)
7042 NODE_NAME_CASE(TAIL_MEDIUM)
7043 NODE_NAME_CASE(TAIL_LARGE)
7044 NODE_NAME_CASE(SELECT_CC)
7045 NODE_NAME_CASE(BR_CC)
7046 NODE_NAME_CASE(BRCOND)
7047 NODE_NAME_CASE(SLL_W)
7048 NODE_NAME_CASE(SRA_W)
7049 NODE_NAME_CASE(SRL_W)
7050 NODE_NAME_CASE(BSTRINS)
7051 NODE_NAME_CASE(BSTRPICK)
7052 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7053 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7054 NODE_NAME_CASE(FTINT)
7055 NODE_NAME_CASE(BUILD_PAIR_F64)
7056 NODE_NAME_CASE(SPLIT_PAIR_F64)
7057 NODE_NAME_CASE(REVB_2H)
7058 NODE_NAME_CASE(REVB_2W)
7059 NODE_NAME_CASE(BITREV_4B)
7060 NODE_NAME_CASE(BITREV_8B)
7061 NODE_NAME_CASE(BITREV_W)
7062 NODE_NAME_CASE(ROTR_W)
7063 NODE_NAME_CASE(ROTL_W)
7064 NODE_NAME_CASE(DIV_W)
7065 NODE_NAME_CASE(DIV_WU)
7066 NODE_NAME_CASE(MOD_W)
7067 NODE_NAME_CASE(MOD_WU)
7068 NODE_NAME_CASE(CLZ_W)
7069 NODE_NAME_CASE(CTZ_W)
7070 NODE_NAME_CASE(DBAR)
7071 NODE_NAME_CASE(IBAR)
7072 NODE_NAME_CASE(BREAK)
7073 NODE_NAME_CASE(SYSCALL)
7074 NODE_NAME_CASE(CRC_W_B_W)
7075 NODE_NAME_CASE(CRC_W_H_W)
7076 NODE_NAME_CASE(CRC_W_W_W)
7077 NODE_NAME_CASE(CRC_W_D_W)
7078 NODE_NAME_CASE(CRCC_W_B_W)
7079 NODE_NAME_CASE(CRCC_W_H_W)
7080 NODE_NAME_CASE(CRCC_W_W_W)
7081 NODE_NAME_CASE(CRCC_W_D_W)
7082 NODE_NAME_CASE(CSRRD)
7083 NODE_NAME_CASE(CSRWR)
7084 NODE_NAME_CASE(CSRXCHG)
7085 NODE_NAME_CASE(IOCSRRD_B)
7086 NODE_NAME_CASE(IOCSRRD_H)
7087 NODE_NAME_CASE(IOCSRRD_W)
7088 NODE_NAME_CASE(IOCSRRD_D)
7089 NODE_NAME_CASE(IOCSRWR_B)
7090 NODE_NAME_CASE(IOCSRWR_H)
7091 NODE_NAME_CASE(IOCSRWR_W)
7092 NODE_NAME_CASE(IOCSRWR_D)
7093 NODE_NAME_CASE(CPUCFG)
7094 NODE_NAME_CASE(MOVGR2FCSR)
7095 NODE_NAME_CASE(MOVFCSR2GR)
7096 NODE_NAME_CASE(CACOP_D)
7097 NODE_NAME_CASE(CACOP_W)
7098 NODE_NAME_CASE(VSHUF)
7099 NODE_NAME_CASE(VPICKEV)
7100 NODE_NAME_CASE(VPICKOD)
7101 NODE_NAME_CASE(VPACKEV)
7102 NODE_NAME_CASE(VPACKOD)
7103 NODE_NAME_CASE(VILVL)
7104 NODE_NAME_CASE(VILVH)
7105 NODE_NAME_CASE(VSHUF4I)
7106 NODE_NAME_CASE(VREPLVEI)
7107 NODE_NAME_CASE(VREPLGR2VR)
7108 NODE_NAME_CASE(XVPERMI)
7109 NODE_NAME_CASE(XVPERM)
7110 NODE_NAME_CASE(VPICK_SEXT_ELT)
7111 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7112 NODE_NAME_CASE(VREPLVE)
7113 NODE_NAME_CASE(VALL_ZERO)
7114 NODE_NAME_CASE(VANY_ZERO)
7115 NODE_NAME_CASE(VALL_NONZERO)
7116 NODE_NAME_CASE(VANY_NONZERO)
7117 NODE_NAME_CASE(FRECIPE)
7118 NODE_NAME_CASE(FRSQRTE)
7119 NODE_NAME_CASE(VSLLI)
7120 NODE_NAME_CASE(VSRLI)
7121 NODE_NAME_CASE(VBSLL)
7122 NODE_NAME_CASE(VBSRL)
7123 NODE_NAME_CASE(VLDREPL)
7124 NODE_NAME_CASE(VMSKLTZ)
7125 NODE_NAME_CASE(VMSKGEZ)
7126 NODE_NAME_CASE(VMSKEQZ)
7127 NODE_NAME_CASE(VMSKNEZ)
7128 NODE_NAME_CASE(XVMSKLTZ)
7129 NODE_NAME_CASE(XVMSKGEZ)
7130 NODE_NAME_CASE(XVMSKEQZ)
7131 NODE_NAME_CASE(XVMSKNEZ)
7132 NODE_NAME_CASE(VHADDW)
7133 }
7134#undef NODE_NAME_CASE
7135 return nullptr;
7136}
7137
7138//===----------------------------------------------------------------------===//
7139// Calling Convention Implementation
7140//===----------------------------------------------------------------------===//
7141
7142// Eight general-purpose registers a0-a7 used for passing integer arguments,
7143// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7144// fixed-point arguments, and floating-point arguments when no FPR is available
7145// or with soft float ABI.
7146const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7147 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7148 LoongArch::R10, LoongArch::R11};
7149// Eight floating-point registers fa0-fa7 used for passing floating-point
7150// arguments, and fa0-fa1 are also used to return values.
7151const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7152 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7153 LoongArch::F6, LoongArch::F7};
7154// FPR32 and FPR64 alias each other.
7156 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7157 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7158
7159const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7160 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7161 LoongArch::VR6, LoongArch::VR7};
7162
7163const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7164 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7165 LoongArch::XR6, LoongArch::XR7};
7166
7167// Pass a 2*GRLen argument that has been split into two GRLen values through
7168// registers or the stack as necessary.
7169static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7170 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7171 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7172 ISD::ArgFlagsTy ArgFlags2) {
7173 unsigned GRLenInBytes = GRLen / 8;
7174 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7175 // At least one half can be passed via register.
7176 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7177 VA1.getLocVT(), CCValAssign::Full));
7178 } else {
7179 // Both halves must be passed on the stack, with proper alignment.
7180 Align StackAlign =
7181 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7182 State.addLoc(
7184 State.AllocateStack(GRLenInBytes, StackAlign),
7185 VA1.getLocVT(), CCValAssign::Full));
7186 State.addLoc(CCValAssign::getMem(
7187 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7188 LocVT2, CCValAssign::Full));
7189 return false;
7190 }
7191 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7192 // The second half can also be passed via register.
7193 State.addLoc(
7194 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7195 } else {
7196 // The second half is passed via the stack, without additional alignment.
7197 State.addLoc(CCValAssign::getMem(
7198 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7199 LocVT2, CCValAssign::Full));
7200 }
7201 return false;
7202}
7203
7204// Implements the LoongArch calling convention. Returns true upon failure.
7206 unsigned ValNo, MVT ValVT,
7207 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7208 CCState &State, bool IsRet, Type *OrigTy) {
7209 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7210 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7211 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7212 MVT LocVT = ValVT;
7213
7214 // Any return value split into more than two values can't be returned
7215 // directly.
7216 if (IsRet && ValNo > 1)
7217 return true;
7218
7219 // If passing a variadic argument, or if no FPR is available.
7220 bool UseGPRForFloat = true;
7221
7222 switch (ABI) {
7223 default:
7224 llvm_unreachable("Unexpected ABI");
7225 break;
7230 UseGPRForFloat = ArgFlags.isVarArg();
7231 break;
7234 break;
7235 }
7236
7237 // If this is a variadic argument, the LoongArch calling convention requires
7238 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7239 // byte alignment. An aligned register should be used regardless of whether
7240 // the original argument was split during legalisation or not. The argument
7241 // will not be passed by registers if the original type is larger than
7242 // 2*GRLen, so the register alignment rule does not apply.
7243 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7244 if (ArgFlags.isVarArg() &&
7245 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7246 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7247 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7248 // Skip 'odd' register if necessary.
7249 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7250 State.AllocateReg(ArgGPRs);
7251 }
7252
7253 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7254 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7255 State.getPendingArgFlags();
7256
7257 assert(PendingLocs.size() == PendingArgFlags.size() &&
7258 "PendingLocs and PendingArgFlags out of sync");
7259
7260 // FPR32 and FPR64 alias each other.
7261 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7262 UseGPRForFloat = true;
7263
7264 if (UseGPRForFloat && ValVT == MVT::f32) {
7265 LocVT = GRLenVT;
7266 LocInfo = CCValAssign::BCvt;
7267 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7268 LocVT = MVT::i64;
7269 LocInfo = CCValAssign::BCvt;
7270 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7271 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7272 // registers are exhausted.
7273 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7274 // Depending on available argument GPRS, f64 may be passed in a pair of
7275 // GPRs, split between a GPR and the stack, or passed completely on the
7276 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7277 // cases.
7278 MCRegister Reg = State.AllocateReg(ArgGPRs);
7279 if (!Reg) {
7280 int64_t StackOffset = State.AllocateStack(8, Align(8));
7281 State.addLoc(
7282 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7283 return false;
7284 }
7285 LocVT = MVT::i32;
7286 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7287 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7288 if (HiReg) {
7289 State.addLoc(
7290 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7291 } else {
7292 int64_t StackOffset = State.AllocateStack(4, Align(4));
7293 State.addLoc(
7294 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7295 }
7296 return false;
7297 }
7298
7299 // Split arguments might be passed indirectly, so keep track of the pending
7300 // values.
7301 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7302 LocVT = GRLenVT;
7303 LocInfo = CCValAssign::Indirect;
7304 PendingLocs.push_back(
7305 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7306 PendingArgFlags.push_back(ArgFlags);
7307 if (!ArgFlags.isSplitEnd()) {
7308 return false;
7309 }
7310 }
7311
7312 // If the split argument only had two elements, it should be passed directly
7313 // in registers or on the stack.
7314 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7315 PendingLocs.size() <= 2) {
7316 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7317 // Apply the normal calling convention rules to the first half of the
7318 // split argument.
7319 CCValAssign VA = PendingLocs[0];
7320 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7321 PendingLocs.clear();
7322 PendingArgFlags.clear();
7323 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7324 ArgFlags);
7325 }
7326
7327 // Allocate to a register if possible, or else a stack slot.
7328 Register Reg;
7329 unsigned StoreSizeBytes = GRLen / 8;
7330 Align StackAlign = Align(GRLen / 8);
7331
7332 if (ValVT == MVT::f32 && !UseGPRForFloat)
7333 Reg = State.AllocateReg(ArgFPR32s);
7334 else if (ValVT == MVT::f64 && !UseGPRForFloat)
7335 Reg = State.AllocateReg(ArgFPR64s);
7336 else if (ValVT.is128BitVector())
7337 Reg = State.AllocateReg(ArgVRs);
7338 else if (ValVT.is256BitVector())
7339 Reg = State.AllocateReg(ArgXRs);
7340 else
7341 Reg = State.AllocateReg(ArgGPRs);
7342
7343 unsigned StackOffset =
7344 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7345
7346 // If we reach this point and PendingLocs is non-empty, we must be at the
7347 // end of a split argument that must be passed indirectly.
7348 if (!PendingLocs.empty()) {
7349 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7350 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7351 for (auto &It : PendingLocs) {
7352 if (Reg)
7353 It.convertToReg(Reg);
7354 else
7355 It.convertToMem(StackOffset);
7356 State.addLoc(It);
7357 }
7358 PendingLocs.clear();
7359 PendingArgFlags.clear();
7360 return false;
7361 }
7362 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7363 "Expected an GRLenVT at this stage");
7364
7365 if (Reg) {
7366 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7367 return false;
7368 }
7369
7370 // When a floating-point value is passed on the stack, no bit-cast is needed.
7371 if (ValVT.isFloatingPoint()) {
7372 LocVT = ValVT;
7373 LocInfo = CCValAssign::Full;
7374 }
7375
7376 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7377 return false;
7378}
7379
7380void LoongArchTargetLowering::analyzeInputArgs(
7381 MachineFunction &MF, CCState &CCInfo,
7382 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7383 LoongArchCCAssignFn Fn) const {
7384 FunctionType *FType = MF.getFunction().getFunctionType();
7385 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7386 MVT ArgVT = Ins[i].VT;
7387 Type *ArgTy = nullptr;
7388 if (IsRet)
7389 ArgTy = FType->getReturnType();
7390 else if (Ins[i].isOrigArg())
7391 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7393 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7394 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7395 CCInfo, IsRet, ArgTy)) {
7396 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7397 << '\n');
7398 llvm_unreachable("");
7399 }
7400 }
7401}
7402
7403void LoongArchTargetLowering::analyzeOutputArgs(
7404 MachineFunction &MF, CCState &CCInfo,
7405 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7406 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7407 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7408 MVT ArgVT = Outs[i].VT;
7409 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7411 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7412 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7413 CCInfo, IsRet, OrigTy)) {
7414 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7415 << "\n");
7416 llvm_unreachable("");
7417 }
7418 }
7419}
7420
7421// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7422// values.
7424 const CCValAssign &VA, const SDLoc &DL) {
7425 switch (VA.getLocInfo()) {
7426 default:
7427 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7428 case CCValAssign::Full:
7430 break;
7431 case CCValAssign::BCvt:
7432 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7433 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7434 else
7435 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7436 break;
7437 }
7438 return Val;
7439}
7440
7442 const CCValAssign &VA, const SDLoc &DL,
7443 const ISD::InputArg &In,
7444 const LoongArchTargetLowering &TLI) {
7447 EVT LocVT = VA.getLocVT();
7448 SDValue Val;
7449 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7450 Register VReg = RegInfo.createVirtualRegister(RC);
7451 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7452 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7453
7454 // If input is sign extended from 32 bits, note it for the OptW pass.
7455 if (In.isOrigArg()) {
7456 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7457 if (OrigArg->getType()->isIntegerTy()) {
7458 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7459 // An input zero extended from i31 can also be considered sign extended.
7460 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7461 (BitWidth < 32 && In.Flags.isZExt())) {
7464 LAFI->addSExt32Register(VReg);
7465 }
7466 }
7467 }
7468
7469 return convertLocVTToValVT(DAG, Val, VA, DL);
7470}
7471
7472// The caller is responsible for loading the full value if the argument is
7473// passed with CCValAssign::Indirect.
7475 const CCValAssign &VA, const SDLoc &DL) {
7477 MachineFrameInfo &MFI = MF.getFrameInfo();
7478 EVT ValVT = VA.getValVT();
7479 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7480 /*IsImmutable=*/true);
7481 SDValue FIN = DAG.getFrameIndex(
7483
7484 ISD::LoadExtType ExtType;
7485 switch (VA.getLocInfo()) {
7486 default:
7487 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7488 case CCValAssign::Full:
7490 case CCValAssign::BCvt:
7491 ExtType = ISD::NON_EXTLOAD;
7492 break;
7493 }
7494 return DAG.getExtLoad(
7495 ExtType, DL, VA.getLocVT(), Chain, FIN,
7497}
7498
7500 const CCValAssign &VA,
7501 const CCValAssign &HiVA,
7502 const SDLoc &DL) {
7503 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7504 "Unexpected VA");
7506 MachineFrameInfo &MFI = MF.getFrameInfo();
7508
7509 assert(VA.isRegLoc() && "Expected register VA assignment");
7510
7511 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7512 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7513 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7514 SDValue Hi;
7515 if (HiVA.isMemLoc()) {
7516 // Second half of f64 is passed on the stack.
7517 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7518 /*IsImmutable=*/true);
7519 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7520 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7522 } else {
7523 // Second half of f64 is passed in another GPR.
7524 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7525 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7526 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7527 }
7528 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7529}
7530
7532 const CCValAssign &VA, const SDLoc &DL) {
7533 EVT LocVT = VA.getLocVT();
7534
7535 switch (VA.getLocInfo()) {
7536 default:
7537 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7538 case CCValAssign::Full:
7539 break;
7540 case CCValAssign::BCvt:
7541 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7542 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7543 else
7544 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7545 break;
7546 }
7547 return Val;
7548}
7549
7550static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7551 CCValAssign::LocInfo LocInfo,
7552 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7553 CCState &State) {
7554 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7555 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7556 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7557 static const MCPhysReg GPRList[] = {
7558 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7559 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7560 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7561 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7562 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7563 return false;
7564 }
7565 }
7566
7567 if (LocVT == MVT::f32) {
7568 // Pass in STG registers: F1, F2, F3, F4
7569 // fs0,fs1,fs2,fs3
7570 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7571 LoongArch::F26, LoongArch::F27};
7572 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7573 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7574 return false;
7575 }
7576 }
7577
7578 if (LocVT == MVT::f64) {
7579 // Pass in STG registers: D1, D2, D3, D4
7580 // fs4,fs5,fs6,fs7
7581 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7582 LoongArch::F30_64, LoongArch::F31_64};
7583 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7584 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7585 return false;
7586 }
7587 }
7588
7589 report_fatal_error("No registers left in GHC calling convention");
7590 return true;
7591}
7592
7593// Transform physical registers into virtual registers.
7595 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7596 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7597 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7598
7600
7601 switch (CallConv) {
7602 default:
7603 llvm_unreachable("Unsupported calling convention");
7604 case CallingConv::C:
7605 case CallingConv::Fast:
7607 break;
7608 case CallingConv::GHC:
7609 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7610 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7612 "GHC calling convention requires the F and D extensions");
7613 }
7614
7615 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7616 MVT GRLenVT = Subtarget.getGRLenVT();
7617 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7618 // Used with varargs to acumulate store chains.
7619 std::vector<SDValue> OutChains;
7620
7621 // Assign locations to all of the incoming arguments.
7623 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7624
7625 if (CallConv == CallingConv::GHC)
7627 else
7628 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7629
7630 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7631 CCValAssign &VA = ArgLocs[i];
7632 SDValue ArgValue;
7633 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7634 // case.
7635 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7636 assert(VA.needsCustom());
7637 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7638 } else if (VA.isRegLoc())
7639 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7640 else
7641 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7642 if (VA.getLocInfo() == CCValAssign::Indirect) {
7643 // If the original argument was split and passed by reference, we need to
7644 // load all parts of it here (using the same address).
7645 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7647 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7648 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7649 assert(ArgPartOffset == 0);
7650 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7651 CCValAssign &PartVA = ArgLocs[i + 1];
7652 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7653 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7654 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7655 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7657 ++i;
7658 ++InsIdx;
7659 }
7660 continue;
7661 }
7662 InVals.push_back(ArgValue);
7663 }
7664
7665 if (IsVarArg) {
7667 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7668 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7669 MachineFrameInfo &MFI = MF.getFrameInfo();
7670 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7671 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7672
7673 // Offset of the first variable argument from stack pointer, and size of
7674 // the vararg save area. For now, the varargs save area is either zero or
7675 // large enough to hold a0-a7.
7676 int VaArgOffset, VarArgsSaveSize;
7677
7678 // If all registers are allocated, then all varargs must be passed on the
7679 // stack and we don't need to save any argregs.
7680 if (ArgRegs.size() == Idx) {
7681 VaArgOffset = CCInfo.getStackSize();
7682 VarArgsSaveSize = 0;
7683 } else {
7684 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7685 VaArgOffset = -VarArgsSaveSize;
7686 }
7687
7688 // Record the frame index of the first variable argument
7689 // which is a value necessary to VASTART.
7690 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7691 LoongArchFI->setVarArgsFrameIndex(FI);
7692
7693 // If saving an odd number of registers then create an extra stack slot to
7694 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7695 // offsets to even-numbered registered remain 2*GRLen-aligned.
7696 if (Idx % 2) {
7697 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7698 true);
7699 VarArgsSaveSize += GRLenInBytes;
7700 }
7701
7702 // Copy the integer registers that may have been used for passing varargs
7703 // to the vararg save area.
7704 for (unsigned I = Idx; I < ArgRegs.size();
7705 ++I, VaArgOffset += GRLenInBytes) {
7706 const Register Reg = RegInfo.createVirtualRegister(RC);
7707 RegInfo.addLiveIn(ArgRegs[I], Reg);
7708 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7709 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7710 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7711 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7713 cast<StoreSDNode>(Store.getNode())
7714 ->getMemOperand()
7715 ->setValue((Value *)nullptr);
7716 OutChains.push_back(Store);
7717 }
7718 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7719 }
7720
7721 // All stores are grouped in one node to allow the matching between
7722 // the size of Ins and InVals. This only happens for vararg functions.
7723 if (!OutChains.empty()) {
7724 OutChains.push_back(Chain);
7725 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7726 }
7727
7728 return Chain;
7729}
7730
7732 return CI->isTailCall();
7733}
7734
7735// Check if the return value is used as only a return value, as otherwise
7736// we can't perform a tail-call.
7738 SDValue &Chain) const {
7739 if (N->getNumValues() != 1)
7740 return false;
7741 if (!N->hasNUsesOfValue(1, 0))
7742 return false;
7743
7744 SDNode *Copy = *N->user_begin();
7745 if (Copy->getOpcode() != ISD::CopyToReg)
7746 return false;
7747
7748 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7749 // isn't safe to perform a tail call.
7750 if (Copy->getGluedNode())
7751 return false;
7752
7753 // The copy must be used by a LoongArchISD::RET, and nothing else.
7754 bool HasRet = false;
7755 for (SDNode *Node : Copy->users()) {
7756 if (Node->getOpcode() != LoongArchISD::RET)
7757 return false;
7758 HasRet = true;
7759 }
7760
7761 if (!HasRet)
7762 return false;
7763
7764 Chain = Copy->getOperand(0);
7765 return true;
7766}
7767
7768// Check whether the call is eligible for tail call optimization.
7769bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7770 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7771 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7772
7773 auto CalleeCC = CLI.CallConv;
7774 auto &Outs = CLI.Outs;
7775 auto &Caller = MF.getFunction();
7776 auto CallerCC = Caller.getCallingConv();
7777
7778 // Do not tail call opt if the stack is used to pass parameters.
7779 if (CCInfo.getStackSize() != 0)
7780 return false;
7781
7782 // Do not tail call opt if any parameters need to be passed indirectly.
7783 for (auto &VA : ArgLocs)
7784 if (VA.getLocInfo() == CCValAssign::Indirect)
7785 return false;
7786
7787 // Do not tail call opt if either caller or callee uses struct return
7788 // semantics.
7789 auto IsCallerStructRet = Caller.hasStructRetAttr();
7790 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7791 if (IsCallerStructRet || IsCalleeStructRet)
7792 return false;
7793
7794 // Do not tail call opt if either the callee or caller has a byval argument.
7795 for (auto &Arg : Outs)
7796 if (Arg.Flags.isByVal())
7797 return false;
7798
7799 // The callee has to preserve all registers the caller needs to preserve.
7800 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7801 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7802 if (CalleeCC != CallerCC) {
7803 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7804 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7805 return false;
7806 }
7807 return true;
7808}
7809
7811 return DAG.getDataLayout().getPrefTypeAlign(
7812 VT.getTypeForEVT(*DAG.getContext()));
7813}
7814
7815// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7816// and output parameter nodes.
7817SDValue
7819 SmallVectorImpl<SDValue> &InVals) const {
7820 SelectionDAG &DAG = CLI.DAG;
7821 SDLoc &DL = CLI.DL;
7823 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7825 SDValue Chain = CLI.Chain;
7826 SDValue Callee = CLI.Callee;
7827 CallingConv::ID CallConv = CLI.CallConv;
7828 bool IsVarArg = CLI.IsVarArg;
7829 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7830 MVT GRLenVT = Subtarget.getGRLenVT();
7831 bool &IsTailCall = CLI.IsTailCall;
7832
7834
7835 // Analyze the operands of the call, assigning locations to each operand.
7837 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7838
7839 if (CallConv == CallingConv::GHC)
7840 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7841 else
7842 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7843
7844 // Check if it's really possible to do a tail call.
7845 if (IsTailCall)
7846 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7847
7848 if (IsTailCall)
7849 ++NumTailCalls;
7850 else if (CLI.CB && CLI.CB->isMustTailCall())
7851 report_fatal_error("failed to perform tail call elimination on a call "
7852 "site marked musttail");
7853
7854 // Get a count of how many bytes are to be pushed on the stack.
7855 unsigned NumBytes = ArgCCInfo.getStackSize();
7856
7857 // Create local copies for byval args.
7858 SmallVector<SDValue> ByValArgs;
7859 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7860 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7861 if (!Flags.isByVal())
7862 continue;
7863
7864 SDValue Arg = OutVals[i];
7865 unsigned Size = Flags.getByValSize();
7866 Align Alignment = Flags.getNonZeroByValAlign();
7867
7868 int FI =
7869 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7870 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7871 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7872
7873 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7874 /*IsVolatile=*/false,
7875 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7877 ByValArgs.push_back(FIPtr);
7878 }
7879
7880 if (!IsTailCall)
7881 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7882
7883 // Copy argument values to their designated locations.
7885 SmallVector<SDValue> MemOpChains;
7886 SDValue StackPtr;
7887 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7888 ++i, ++OutIdx) {
7889 CCValAssign &VA = ArgLocs[i];
7890 SDValue ArgValue = OutVals[OutIdx];
7891 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7892
7893 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7894 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7895 assert(VA.isRegLoc() && "Expected register VA assignment");
7896 assert(VA.needsCustom());
7897 SDValue SplitF64 =
7899 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7900 SDValue Lo = SplitF64.getValue(0);
7901 SDValue Hi = SplitF64.getValue(1);
7902
7903 Register RegLo = VA.getLocReg();
7904 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7905
7906 // Get the CCValAssign for the Hi part.
7907 CCValAssign &HiVA = ArgLocs[++i];
7908
7909 if (HiVA.isMemLoc()) {
7910 // Second half of f64 is passed on the stack.
7911 if (!StackPtr.getNode())
7912 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7914 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7915 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7916 // Emit the store.
7917 MemOpChains.push_back(DAG.getStore(
7918 Chain, DL, Hi, Address,
7920 } else {
7921 // Second half of f64 is passed in another GPR.
7922 Register RegHigh = HiVA.getLocReg();
7923 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7924 }
7925 continue;
7926 }
7927
7928 // Promote the value if needed.
7929 // For now, only handle fully promoted and indirect arguments.
7930 if (VA.getLocInfo() == CCValAssign::Indirect) {
7931 // Store the argument in a stack slot and pass its address.
7932 Align StackAlign =
7933 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7934 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7935 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7936 // If the original argument was split and passed by reference, we need to
7937 // store the required parts of it here (and pass just one address).
7938 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7939 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7940 assert(ArgPartOffset == 0);
7941 // Calculate the total size to store. We don't have access to what we're
7942 // actually storing other than performing the loop and collecting the
7943 // info.
7945 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7946 SDValue PartValue = OutVals[OutIdx + 1];
7947 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7948 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7949 EVT PartVT = PartValue.getValueType();
7950
7951 StoredSize += PartVT.getStoreSize();
7952 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7953 Parts.push_back(std::make_pair(PartValue, Offset));
7954 ++i;
7955 ++OutIdx;
7956 }
7957 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7958 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7959 MemOpChains.push_back(
7960 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7962 for (const auto &Part : Parts) {
7963 SDValue PartValue = Part.first;
7964 SDValue PartOffset = Part.second;
7966 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7967 MemOpChains.push_back(
7968 DAG.getStore(Chain, DL, PartValue, Address,
7970 }
7971 ArgValue = SpillSlot;
7972 } else {
7973 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7974 }
7975
7976 // Use local copy if it is a byval arg.
7977 if (Flags.isByVal())
7978 ArgValue = ByValArgs[j++];
7979
7980 if (VA.isRegLoc()) {
7981 // Queue up the argument copies and emit them at the end.
7982 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7983 } else {
7984 assert(VA.isMemLoc() && "Argument not register or memory");
7985 assert(!IsTailCall && "Tail call not allowed if stack is used "
7986 "for passing parameters");
7987
7988 // Work out the address of the stack slot.
7989 if (!StackPtr.getNode())
7990 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7992 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7994
7995 // Emit the store.
7996 MemOpChains.push_back(
7997 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7998 }
7999 }
8000
8001 // Join the stores, which are independent of one another.
8002 if (!MemOpChains.empty())
8003 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8004
8005 SDValue Glue;
8006
8007 // Build a sequence of copy-to-reg nodes, chained and glued together.
8008 for (auto &Reg : RegsToPass) {
8009 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8010 Glue = Chain.getValue(1);
8011 }
8012
8013 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8014 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8015 // split it and then direct call can be matched by PseudoCALL.
8017 const GlobalValue *GV = S->getGlobal();
8018 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8021 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8022 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8023 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8026 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8027 }
8028
8029 // The first call operand is the chain and the second is the target address.
8031 Ops.push_back(Chain);
8032 Ops.push_back(Callee);
8033
8034 // Add argument registers to the end of the list so that they are
8035 // known live into the call.
8036 for (auto &Reg : RegsToPass)
8037 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8038
8039 if (!IsTailCall) {
8040 // Add a register mask operand representing the call-preserved registers.
8041 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8042 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8043 assert(Mask && "Missing call preserved mask for calling convention");
8044 Ops.push_back(DAG.getRegisterMask(Mask));
8045 }
8046
8047 // Glue the call to the argument copies, if any.
8048 if (Glue.getNode())
8049 Ops.push_back(Glue);
8050
8051 // Emit the call.
8052 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8053 unsigned Op;
8054 switch (DAG.getTarget().getCodeModel()) {
8055 default:
8056 report_fatal_error("Unsupported code model");
8057 case CodeModel::Small:
8058 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8059 break;
8060 case CodeModel::Medium:
8061 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8063 break;
8064 case CodeModel::Large:
8065 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8067 break;
8068 }
8069
8070 if (IsTailCall) {
8072 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8073 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8074 return Ret;
8075 }
8076
8077 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8078 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8079 Glue = Chain.getValue(1);
8080
8081 // Mark the end of the call, which is glued to the call itself.
8082 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8083 Glue = Chain.getValue(1);
8084
8085 // Assign locations to each value returned by this call.
8087 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8088 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8089
8090 // Copy all of the result registers out of their specified physreg.
8091 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8092 auto &VA = RVLocs[i];
8093 // Copy the value out.
8094 SDValue RetValue =
8095 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8096 // Glue the RetValue to the end of the call sequence.
8097 Chain = RetValue.getValue(1);
8098 Glue = RetValue.getValue(2);
8099
8100 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8101 assert(VA.needsCustom());
8102 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8103 MVT::i32, Glue);
8104 Chain = RetValue2.getValue(1);
8105 Glue = RetValue2.getValue(2);
8106 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8107 RetValue, RetValue2);
8108 } else
8109 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8110
8111 InVals.push_back(RetValue);
8112 }
8113
8114 return Chain;
8115}
8116
8118 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8119 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8120 const Type *RetTy) const {
8122 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8123
8124 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8125 LoongArchABI::ABI ABI =
8126 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8127 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8128 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8129 return false;
8130 }
8131 return true;
8132}
8133
8135 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8137 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8138 SelectionDAG &DAG) const {
8139 // Stores the assignment of the return value to a location.
8141
8142 // Info about the registers and stack slot.
8143 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8144 *DAG.getContext());
8145
8146 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8147 nullptr, CC_LoongArch);
8148 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8149 report_fatal_error("GHC functions return void only");
8150 SDValue Glue;
8151 SmallVector<SDValue, 4> RetOps(1, Chain);
8152
8153 // Copy the result values into the output registers.
8154 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8155 SDValue Val = OutVals[OutIdx];
8156 CCValAssign &VA = RVLocs[i];
8157 assert(VA.isRegLoc() && "Can only return in registers!");
8158
8159 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8160 // Handle returning f64 on LA32D with a soft float ABI.
8161 assert(VA.isRegLoc() && "Expected return via registers");
8162 assert(VA.needsCustom());
8164 DAG.getVTList(MVT::i32, MVT::i32), Val);
8165 SDValue Lo = SplitF64.getValue(0);
8166 SDValue Hi = SplitF64.getValue(1);
8167 Register RegLo = VA.getLocReg();
8168 Register RegHi = RVLocs[++i].getLocReg();
8169
8170 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8171 Glue = Chain.getValue(1);
8172 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8173 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8174 Glue = Chain.getValue(1);
8175 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8176 } else {
8177 // Handle a 'normal' return.
8178 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8179 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8180
8181 // Guarantee that all emitted copies are stuck together.
8182 Glue = Chain.getValue(1);
8183 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8184 }
8185 }
8186
8187 RetOps[0] = Chain; // Update chain.
8188
8189 // Add the glue node if we have it.
8190 if (Glue.getNode())
8191 RetOps.push_back(Glue);
8192
8193 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8194}
8195
8197 EVT VT) const {
8198 if (!Subtarget.hasExtLSX())
8199 return false;
8200
8201 if (VT == MVT::f32) {
8202 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8203 return (masked == 0x3e000000 || masked == 0x40000000);
8204 }
8205
8206 if (VT == MVT::f64) {
8207 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8208 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8209 }
8210
8211 return false;
8212}
8213
8214bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8215 bool ForCodeSize) const {
8216 // TODO: Maybe need more checks here after vector extension is supported.
8217 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8218 return false;
8219 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8220 return false;
8221 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8222}
8223
8225 return true;
8226}
8227
8229 return true;
8230}
8231
8232bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8233 const Instruction *I) const {
8234 if (!Subtarget.is64Bit())
8235 return isa<LoadInst>(I) || isa<StoreInst>(I);
8236
8237 if (isa<LoadInst>(I))
8238 return true;
8239
8240 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8241 // require fences beacuse we can use amswap_db.[w/d].
8242 Type *Ty = I->getOperand(0)->getType();
8243 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8244 unsigned Size = Ty->getIntegerBitWidth();
8245 return (Size == 8 || Size == 16);
8246 }
8247
8248 return false;
8249}
8250
8252 LLVMContext &Context,
8253 EVT VT) const {
8254 if (!VT.isVector())
8255 return getPointerTy(DL);
8257}
8258
8260 // TODO: Support vectors.
8261 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8262}
8263
8265 const CallInst &I,
8266 MachineFunction &MF,
8267 unsigned Intrinsic) const {
8268 switch (Intrinsic) {
8269 default:
8270 return false;
8271 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8272 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8273 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8274 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8275 Info.opc = ISD::INTRINSIC_W_CHAIN;
8276 Info.memVT = MVT::i32;
8277 Info.ptrVal = I.getArgOperand(0);
8278 Info.offset = 0;
8279 Info.align = Align(4);
8282 return true;
8283 // TODO: Add more Intrinsics later.
8284 }
8285}
8286
8287// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8288// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8289// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8290// regression, we need to implement it manually.
8293
8295 Op == AtomicRMWInst::And) &&
8296 "Unable to expand");
8297 unsigned MinWordSize = 4;
8298
8299 IRBuilder<> Builder(AI);
8300 LLVMContext &Ctx = Builder.getContext();
8301 const DataLayout &DL = AI->getDataLayout();
8302 Type *ValueType = AI->getType();
8303 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8304
8305 Value *Addr = AI->getPointerOperand();
8306 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8307 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8308
8309 Value *AlignedAddr = Builder.CreateIntrinsic(
8310 Intrinsic::ptrmask, {PtrTy, IntTy},
8311 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8312 "AlignedAddr");
8313
8314 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8315 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8316 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8317 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8318 Value *Mask = Builder.CreateShl(
8319 ConstantInt::get(WordType,
8320 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8321 ShiftAmt, "Mask");
8322 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8323 Value *ValOperand_Shifted =
8324 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8325 ShiftAmt, "ValOperand_Shifted");
8326 Value *NewOperand;
8327 if (Op == AtomicRMWInst::And)
8328 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8329 else
8330 NewOperand = ValOperand_Shifted;
8331
8332 AtomicRMWInst *NewAI =
8333 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8334 AI->getOrdering(), AI->getSyncScopeID());
8335
8336 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8337 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8338 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8339 AI->replaceAllUsesWith(FinalOldResult);
8340 AI->eraseFromParent();
8341}
8342
8345 // TODO: Add more AtomicRMWInst that needs to be extended.
8346
8347 // Since floating-point operation requires a non-trivial set of data
8348 // operations, use CmpXChg to expand.
8349 if (AI->isFloatingPointOperation() ||
8355
8356 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8359 AI->getOperation() == AtomicRMWInst::Sub)) {
8361 }
8362
8363 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8364 if (Subtarget.hasLAMCAS()) {
8365 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8369 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8371 }
8372
8373 if (Size == 8 || Size == 16)
8376}
8377
8378static Intrinsic::ID
8380 AtomicRMWInst::BinOp BinOp) {
8381 if (GRLen == 64) {
8382 switch (BinOp) {
8383 default:
8384 llvm_unreachable("Unexpected AtomicRMW BinOp");
8386 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8387 case AtomicRMWInst::Add:
8388 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8389 case AtomicRMWInst::Sub:
8390 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8392 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8394 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8396 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8397 case AtomicRMWInst::Max:
8398 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8399 case AtomicRMWInst::Min:
8400 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8401 // TODO: support other AtomicRMWInst.
8402 }
8403 }
8404
8405 if (GRLen == 32) {
8406 switch (BinOp) {
8407 default:
8408 llvm_unreachable("Unexpected AtomicRMW BinOp");
8410 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8411 case AtomicRMWInst::Add:
8412 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8413 case AtomicRMWInst::Sub:
8414 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8416 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8418 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8420 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8421 case AtomicRMWInst::Max:
8422 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8423 case AtomicRMWInst::Min:
8424 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8425 // TODO: support other AtomicRMWInst.
8426 }
8427 }
8428
8429 llvm_unreachable("Unexpected GRLen\n");
8430}
8431
8434 AtomicCmpXchgInst *CI) const {
8435
8436 if (Subtarget.hasLAMCAS())
8438
8440 if (Size == 8 || Size == 16)
8443}
8444
8446 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8447 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8448 unsigned GRLen = Subtarget.getGRLen();
8449 AtomicOrdering FailOrd = CI->getFailureOrdering();
8450 Value *FailureOrdering =
8451 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8452 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8453 if (GRLen == 64) {
8454 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8455 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8456 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8457 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8458 }
8459 Type *Tys[] = {AlignedAddr->getType()};
8460 Value *Result = Builder.CreateIntrinsic(
8461 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8462 if (GRLen == 64)
8463 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8464 return Result;
8465}
8466
8468 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8469 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8470 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8471 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8472 // mask, as this produces better code than the LL/SC loop emitted by
8473 // int_loongarch_masked_atomicrmw_xchg.
8474 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8477 if (CVal->isZero())
8478 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8479 Builder.CreateNot(Mask, "Inv_Mask"),
8480 AI->getAlign(), Ord);
8481 if (CVal->isMinusOne())
8482 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8483 AI->getAlign(), Ord);
8484 }
8485
8486 unsigned GRLen = Subtarget.getGRLen();
8487 Value *Ordering =
8488 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8489 Type *Tys[] = {AlignedAddr->getType()};
8491 AI->getModule(),
8493
8494 if (GRLen == 64) {
8495 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8496 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8497 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8498 }
8499
8500 Value *Result;
8501
8502 // Must pass the shift amount needed to sign extend the loaded value prior
8503 // to performing a signed comparison for min/max. ShiftAmt is the number of
8504 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8505 // is the number of bits to left+right shift the value in order to
8506 // sign-extend.
8507 if (AI->getOperation() == AtomicRMWInst::Min ||
8509 const DataLayout &DL = AI->getDataLayout();
8510 unsigned ValWidth =
8511 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8512 Value *SextShamt =
8513 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8514 Result = Builder.CreateCall(LlwOpScwLoop,
8515 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8516 } else {
8517 Result =
8518 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8519 }
8520
8521 if (GRLen == 64)
8522 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8523 return Result;
8524}
8525
8527 const MachineFunction &MF, EVT VT) const {
8528 VT = VT.getScalarType();
8529
8530 if (!VT.isSimple())
8531 return false;
8532
8533 switch (VT.getSimpleVT().SimpleTy) {
8534 case MVT::f32:
8535 case MVT::f64:
8536 return true;
8537 default:
8538 break;
8539 }
8540
8541 return false;
8542}
8543
8545 const Constant *PersonalityFn) const {
8546 return LoongArch::R4;
8547}
8548
8550 const Constant *PersonalityFn) const {
8551 return LoongArch::R5;
8552}
8553
8554//===----------------------------------------------------------------------===//
8555// Target Optimization Hooks
8556//===----------------------------------------------------------------------===//
8557
8559 const LoongArchSubtarget &Subtarget) {
8560 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8561 // IEEE float has 23 digits and double has 52 digits.
8562 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8563 return RefinementSteps;
8564}
8565
8567 SelectionDAG &DAG, int Enabled,
8568 int &RefinementSteps,
8569 bool &UseOneConstNR,
8570 bool Reciprocal) const {
8571 if (Subtarget.hasFrecipe()) {
8572 SDLoc DL(Operand);
8573 EVT VT = Operand.getValueType();
8574
8575 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8576 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8577 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8578 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8579 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8580
8581 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8582 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8583
8584 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8585 if (Reciprocal)
8586 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8587
8588 return Estimate;
8589 }
8590 }
8591
8592 return SDValue();
8593}
8594
8596 SelectionDAG &DAG,
8597 int Enabled,
8598 int &RefinementSteps) const {
8599 if (Subtarget.hasFrecipe()) {
8600 SDLoc DL(Operand);
8601 EVT VT = Operand.getValueType();
8602
8603 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8604 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8605 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8606 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8607 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8608
8609 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8610 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8611
8612 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8613 }
8614 }
8615
8616 return SDValue();
8617}
8618
8619//===----------------------------------------------------------------------===//
8620// LoongArch Inline Assembly Support
8621//===----------------------------------------------------------------------===//
8622
8624LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8625 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8626 //
8627 // 'f': A floating-point register (if available).
8628 // 'k': A memory operand whose address is formed by a base register and
8629 // (optionally scaled) index register.
8630 // 'l': A signed 16-bit constant.
8631 // 'm': A memory operand whose address is formed by a base register and
8632 // offset that is suitable for use in instructions with the same
8633 // addressing mode as st.w and ld.w.
8634 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8635 // instruction)
8636 // 'I': A signed 12-bit constant (for arithmetic instructions).
8637 // 'J': Integer zero.
8638 // 'K': An unsigned 12-bit constant (for logic instructions).
8639 // "ZB": An address that is held in a general-purpose register. The offset is
8640 // zero.
8641 // "ZC": A memory operand whose address is formed by a base register and
8642 // offset that is suitable for use in instructions with the same
8643 // addressing mode as ll.w and sc.w.
8644 if (Constraint.size() == 1) {
8645 switch (Constraint[0]) {
8646 default:
8647 break;
8648 case 'f':
8649 case 'q':
8650 return C_RegisterClass;
8651 case 'l':
8652 case 'I':
8653 case 'J':
8654 case 'K':
8655 return C_Immediate;
8656 case 'k':
8657 return C_Memory;
8658 }
8659 }
8660
8661 if (Constraint == "ZC" || Constraint == "ZB")
8662 return C_Memory;
8663
8664 // 'm' is handled here.
8665 return TargetLowering::getConstraintType(Constraint);
8666}
8667
8668InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8669 StringRef ConstraintCode) const {
8670 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8674 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8675}
8676
8677std::pair<unsigned, const TargetRegisterClass *>
8678LoongArchTargetLowering::getRegForInlineAsmConstraint(
8679 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8680 // First, see if this is a constraint that directly corresponds to a LoongArch
8681 // register class.
8682 if (Constraint.size() == 1) {
8683 switch (Constraint[0]) {
8684 case 'r':
8685 // TODO: Support fixed vectors up to GRLen?
8686 if (VT.isVector())
8687 break;
8688 return std::make_pair(0U, &LoongArch::GPRRegClass);
8689 case 'q':
8690 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8691 case 'f':
8692 if (Subtarget.hasBasicF() && VT == MVT::f32)
8693 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8694 if (Subtarget.hasBasicD() && VT == MVT::f64)
8695 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8696 if (Subtarget.hasExtLSX() &&
8697 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8698 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8699 if (Subtarget.hasExtLASX() &&
8700 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8701 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8702 break;
8703 default:
8704 break;
8705 }
8706 }
8707
8708 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8709 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8710 // constraints while the official register name is prefixed with a '$'. So we
8711 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8712 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8713 // case insensitive, so no need to convert the constraint to upper case here.
8714 //
8715 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8716 // decode the usage of register name aliases into their official names. And
8717 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8718 // official register names.
8719 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8720 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8721 bool IsFP = Constraint[2] == 'f';
8722 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8723 std::pair<unsigned, const TargetRegisterClass *> R;
8725 TRI, join_items("", Temp.first, Temp.second), VT);
8726 // Match those names to the widest floating point register type available.
8727 if (IsFP) {
8728 unsigned RegNo = R.first;
8729 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8730 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8731 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8732 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8733 }
8734 }
8735 }
8736 return R;
8737 }
8738
8739 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8740}
8741
8742void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8743 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8744 SelectionDAG &DAG) const {
8745 // Currently only support length 1 constraints.
8746 if (Constraint.size() == 1) {
8747 switch (Constraint[0]) {
8748 case 'l':
8749 // Validate & create a 16-bit signed immediate operand.
8750 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8751 uint64_t CVal = C->getSExtValue();
8752 if (isInt<16>(CVal))
8753 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8754 Subtarget.getGRLenVT()));
8755 }
8756 return;
8757 case 'I':
8758 // Validate & create a 12-bit signed immediate operand.
8759 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8760 uint64_t CVal = C->getSExtValue();
8761 if (isInt<12>(CVal))
8762 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8763 Subtarget.getGRLenVT()));
8764 }
8765 return;
8766 case 'J':
8767 // Validate & create an integer zero operand.
8768 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8769 if (C->getZExtValue() == 0)
8770 Ops.push_back(
8771 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8772 return;
8773 case 'K':
8774 // Validate & create a 12-bit unsigned immediate operand.
8775 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8776 uint64_t CVal = C->getZExtValue();
8777 if (isUInt<12>(CVal))
8778 Ops.push_back(
8779 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8780 }
8781 return;
8782 default:
8783 break;
8784 }
8785 }
8787}
8788
8789#define GET_REGISTER_MATCHER
8790#include "LoongArchGenAsmMatcher.inc"
8791
8794 const MachineFunction &MF) const {
8795 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8796 std::string NewRegName = Name.second.str();
8797 Register Reg = MatchRegisterAltName(NewRegName);
8798 if (!Reg)
8799 Reg = MatchRegisterName(NewRegName);
8800 if (!Reg)
8801 return Reg;
8802 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8803 if (!ReservedRegs.test(Reg))
8804 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8805 StringRef(RegName) + "\"."));
8806 return Reg;
8807}
8808
8810 EVT VT, SDValue C) const {
8811 // TODO: Support vectors.
8812 if (!VT.isScalarInteger())
8813 return false;
8814
8815 // Omit the optimization if the data size exceeds GRLen.
8816 if (VT.getSizeInBits() > Subtarget.getGRLen())
8817 return false;
8818
8819 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8820 const APInt &Imm = ConstNode->getAPIntValue();
8821 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8822 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8823 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8824 return true;
8825 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8826 if (ConstNode->hasOneUse() &&
8827 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8828 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8829 return true;
8830 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8831 // in which the immediate has two set bits. Or Break (MUL x, imm)
8832 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8833 // equals to (1 << s0) - (1 << s1).
8834 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8835 unsigned Shifts = Imm.countr_zero();
8836 // Reject immediates which can be composed via a single LUI.
8837 if (Shifts >= 12)
8838 return false;
8839 // Reject multiplications can be optimized to
8840 // (SLLI (ALSL x, x, 1/2/3/4), s).
8841 APInt ImmPop = Imm.ashr(Shifts);
8842 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8843 return false;
8844 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8845 // since it needs one more instruction than other 3 cases.
8846 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8847 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8848 (ImmSmall - Imm).isPowerOf2())
8849 return true;
8850 }
8851 }
8852
8853 return false;
8854}
8855
8857 const AddrMode &AM,
8858 Type *Ty, unsigned AS,
8859 Instruction *I) const {
8860 // LoongArch has four basic addressing modes:
8861 // 1. reg
8862 // 2. reg + 12-bit signed offset
8863 // 3. reg + 14-bit signed offset left-shifted by 2
8864 // 4. reg1 + reg2
8865 // TODO: Add more checks after support vector extension.
8866
8867 // No global is ever allowed as a base.
8868 if (AM.BaseGV)
8869 return false;
8870
8871 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8872 // with `UAL` feature.
8873 if (!isInt<12>(AM.BaseOffs) &&
8874 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8875 return false;
8876
8877 switch (AM.Scale) {
8878 case 0:
8879 // "r+i" or just "i", depending on HasBaseReg.
8880 break;
8881 case 1:
8882 // "r+r+i" is not allowed.
8883 if (AM.HasBaseReg && AM.BaseOffs)
8884 return false;
8885 // Otherwise we have "r+r" or "r+i".
8886 break;
8887 case 2:
8888 // "2*r+r" or "2*r+i" is not allowed.
8889 if (AM.HasBaseReg || AM.BaseOffs)
8890 return false;
8891 // Allow "2*r" as "r+r".
8892 break;
8893 default:
8894 return false;
8895 }
8896
8897 return true;
8898}
8899
8901 return isInt<12>(Imm);
8902}
8903
8905 return isInt<12>(Imm);
8906}
8907
8909 // Zexts are free if they can be combined with a load.
8910 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8911 // poorly with type legalization of compares preferring sext.
8912 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8913 EVT MemVT = LD->getMemoryVT();
8914 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8915 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8916 LD->getExtensionType() == ISD::ZEXTLOAD))
8917 return true;
8918 }
8919
8920 return TargetLowering::isZExtFree(Val, VT2);
8921}
8922
8924 EVT DstVT) const {
8925 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8926}
8927
8929 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8930}
8931
8933 // TODO: Support vectors.
8934 if (Y.getValueType().isVector())
8935 return false;
8936
8937 return !isa<ConstantSDNode>(Y);
8938}
8939
8941 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8942 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8943}
8944
8946 Type *Ty, bool IsSigned) const {
8947 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8948 return true;
8949
8950 return IsSigned;
8951}
8952
8954 // Return false to suppress the unnecessary extensions if the LibCall
8955 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8956 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8957 Type.getSizeInBits() < Subtarget.getGRLen()))
8958 return false;
8959 return true;
8960}
8961
8962// memcpy, and other memory intrinsics, typically tries to use wider load/store
8963// if the source/dest is aligned and the copy size is large enough. We therefore
8964// want to align such objects passed to memory intrinsics.
8966 unsigned &MinSize,
8967 Align &PrefAlign) const {
8968 if (!isa<MemIntrinsic>(CI))
8969 return false;
8970
8971 if (Subtarget.is64Bit()) {
8972 MinSize = 8;
8973 PrefAlign = Align(8);
8974 } else {
8975 MinSize = 4;
8976 PrefAlign = Align(4);
8977 }
8978
8979 return true;
8980}
8981
8990
8991bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8992 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8993 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8994 bool IsABIRegCopy = CC.has_value();
8995 EVT ValueVT = Val.getValueType();
8996
8997 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8998 PartVT == MVT::f32) {
8999 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9000 // nan, and cast to f32.
9001 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9002 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9003 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9004 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9005 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9006 Parts[0] = Val;
9007 return true;
9008 }
9009
9010 return false;
9011}
9012
9013SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9014 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9015 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9016 bool IsABIRegCopy = CC.has_value();
9017
9018 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9019 PartVT == MVT::f32) {
9020 SDValue Val = Parts[0];
9021
9022 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9023 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9024 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9025 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9026 return Val;
9027 }
9028
9029 return SDValue();
9030}
9031
9032MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9033 CallingConv::ID CC,
9034 EVT VT) const {
9035 // Use f32 to pass f16.
9036 if (VT == MVT::f16 && Subtarget.hasBasicF())
9037 return MVT::f32;
9038
9040}
9041
9042unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9043 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9044 // Use f32 to pass f16.
9045 if (VT == MVT::f16 && Subtarget.hasBasicF())
9046 return 1;
9047
9049}
9050
9052 SDValue Op, const APInt &OriginalDemandedBits,
9053 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9054 unsigned Depth) const {
9055 EVT VT = Op.getValueType();
9056 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9057 unsigned Opc = Op.getOpcode();
9058 switch (Opc) {
9059 default:
9060 break;
9063 SDValue Src = Op.getOperand(0);
9064 MVT SrcVT = Src.getSimpleValueType();
9065 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9066 unsigned NumElts = SrcVT.getVectorNumElements();
9067
9068 // If we don't need the sign bits at all just return zero.
9069 if (OriginalDemandedBits.countr_zero() >= NumElts)
9070 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9071
9072 // Only demand the vector elements of the sign bits we need.
9073 APInt KnownUndef, KnownZero;
9074 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9075 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9076 TLO, Depth + 1))
9077 return true;
9078
9079 Known.Zero = KnownZero.zext(BitWidth);
9080 Known.Zero.setHighBits(BitWidth - NumElts);
9081
9082 // [X]VMSKLTZ only uses the MSB from each vector element.
9083 KnownBits KnownSrc;
9084 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9085 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9086 Depth + 1))
9087 return true;
9088
9089 if (KnownSrc.One[SrcBits - 1])
9090 Known.One.setLowBits(NumElts);
9091 else if (KnownSrc.Zero[SrcBits - 1])
9092 Known.Zero.setLowBits(NumElts);
9093
9094 // Attempt to avoid multi-use ops if we don't need anything from it.
9096 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9097 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9098 return false;
9099 }
9100 }
9101
9103 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9104}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1707
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...