LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
93 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
94 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
95 setOperationAction(ISD::VASTART, MVT::Other, Custom);
96 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
97
98 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
99 setOperationAction(ISD::TRAP, MVT::Other, Legal);
100
104
105 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
128 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
147 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
158
162 Custom);
163 setOperationAction(ISD::LROUND, MVT::i32, Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
175 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
176 }
177
178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
194 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
196 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
199 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
204 setOperationAction(ISD::FSIN, MVT::f32, Expand);
205 setOperationAction(ISD::FCOS, MVT::f32, Expand);
206 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
207 setOperationAction(ISD::FPOW, MVT::f32, Expand);
209 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
213 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
214 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
218 setOperationAction(ISD::FRINT, MVT::f32, Legal);
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
241 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
245 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
249 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
251 setOperationAction(ISD::FSIN, MVT::f64, Expand);
252 setOperationAction(ISD::FCOS, MVT::f64, Expand);
253 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
254 setOperationAction(ISD::FPOW, MVT::f64, Expand);
256 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
257 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
259 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
260 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
264 setOperationAction(ISD::FRINT, MVT::f64, Legal);
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
285 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
286 setOperationAction(ISD::BITCAST, VT, Legal);
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
318 }
319 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
321 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
323 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
326 }
327 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 setOperationAction(ISD::FSQRT, VT, Legal);
332 setOperationAction(ISD::FNEG, VT, Legal);
335 VT, Expand);
337 }
339 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
340 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
341 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
342 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
343
344 for (MVT VT :
345 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
346 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
348 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
349 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
350 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
351 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
352 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
353 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
354 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
355 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
356 }
357 }
358
359 // Set operations for 'LASX' feature.
360
361 if (Subtarget.hasExtLASX()) {
362 for (MVT VT : LASXVTs) {
363 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
364 setOperationAction(ISD::BITCAST, VT, Legal);
366
372
376 }
377 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
380 Legal);
382 VT, Legal);
389 Expand);
397 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
398 }
399 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
401 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
403 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
406 }
407 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
411 setOperationAction(ISD::FSQRT, VT, Legal);
412 setOperationAction(ISD::FNEG, VT, Legal);
415 VT, Expand);
417 }
418 }
419
420 // Set DAG combine for LA32 and LA64.
421
426
427 // Set DAG combine for 'LSX' feature.
428
429 if (Subtarget.hasExtLSX()) {
431 setTargetDAGCombine(ISD::BITCAST);
432 }
433
434 // Set DAG combine for 'LASX' feature.
435
436 if (Subtarget.hasExtLASX())
438
439 // Compute derived properties from the register classes.
440 computeRegisterProperties(Subtarget.getRegisterInfo());
441
443
446
447 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
448
450
451 // Function alignments.
453 // Set preferred alignments.
454 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
455 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
456 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
457
458 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
459 if (Subtarget.hasLAMCAS())
461
462 if (Subtarget.hasSCQ()) {
464 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
465 }
466}
467
469 const GlobalAddressSDNode *GA) const {
470 // In order to maximise the opportunity for common subexpression elimination,
471 // keep a separate ADD node for the global address offset instead of folding
472 // it in the global address node. Later peephole optimisations may choose to
473 // fold it back in when profitable.
474 return false;
475}
476
478 SelectionDAG &DAG) const {
479 switch (Op.getOpcode()) {
480 case ISD::ATOMIC_FENCE:
481 return lowerATOMIC_FENCE(Op, DAG);
483 return lowerEH_DWARF_CFA(Op, DAG);
485 return lowerGlobalAddress(Op, DAG);
487 return lowerGlobalTLSAddress(Op, DAG);
489 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
491 return lowerINTRINSIC_W_CHAIN(Op, DAG);
493 return lowerINTRINSIC_VOID(Op, DAG);
495 return lowerBlockAddress(Op, DAG);
496 case ISD::JumpTable:
497 return lowerJumpTable(Op, DAG);
498 case ISD::SHL_PARTS:
499 return lowerShiftLeftParts(Op, DAG);
500 case ISD::SRA_PARTS:
501 return lowerShiftRightParts(Op, DAG, true);
502 case ISD::SRL_PARTS:
503 return lowerShiftRightParts(Op, DAG, false);
505 return lowerConstantPool(Op, DAG);
506 case ISD::FP_TO_SINT:
507 return lowerFP_TO_SINT(Op, DAG);
508 case ISD::BITCAST:
509 return lowerBITCAST(Op, DAG);
510 case ISD::UINT_TO_FP:
511 return lowerUINT_TO_FP(Op, DAG);
512 case ISD::SINT_TO_FP:
513 return lowerSINT_TO_FP(Op, DAG);
514 case ISD::VASTART:
515 return lowerVASTART(Op, DAG);
516 case ISD::FRAMEADDR:
517 return lowerFRAMEADDR(Op, DAG);
518 case ISD::RETURNADDR:
519 return lowerRETURNADDR(Op, DAG);
521 return lowerWRITE_REGISTER(Op, DAG);
523 return lowerINSERT_VECTOR_ELT(Op, DAG);
525 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
527 return lowerBUILD_VECTOR(Op, DAG);
529 return lowerCONCAT_VECTORS(Op, DAG);
531 return lowerVECTOR_SHUFFLE(Op, DAG);
532 case ISD::BITREVERSE:
533 return lowerBITREVERSE(Op, DAG);
535 return lowerSCALAR_TO_VECTOR(Op, DAG);
536 case ISD::PREFETCH:
537 return lowerPREFETCH(Op, DAG);
538 case ISD::SELECT:
539 return lowerSELECT(Op, DAG);
540 case ISD::BRCOND:
541 return lowerBRCOND(Op, DAG);
542 case ISD::FP_TO_FP16:
543 return lowerFP_TO_FP16(Op, DAG);
544 case ISD::FP16_TO_FP:
545 return lowerFP16_TO_FP(Op, DAG);
546 case ISD::FP_TO_BF16:
547 return lowerFP_TO_BF16(Op, DAG);
548 case ISD::BF16_TO_FP:
549 return lowerBF16_TO_FP(Op, DAG);
550 case ISD::VECREDUCE_ADD:
551 return lowerVECREDUCE_ADD(Op, DAG);
552 case ISD::VECREDUCE_AND:
553 case ISD::VECREDUCE_OR:
554 case ISD::VECREDUCE_XOR:
555 case ISD::VECREDUCE_SMAX:
556 case ISD::VECREDUCE_SMIN:
557 case ISD::VECREDUCE_UMAX:
558 case ISD::VECREDUCE_UMIN:
559 return lowerVECREDUCE(Op, DAG);
560 }
561 return SDValue();
562}
563
564// Lower vecreduce_add using vhaddw instructions.
565// For Example:
566// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
567// can be lowered to:
568// VHADDW_D_W vr0, vr0, vr0
569// VHADDW_Q_D vr0, vr0, vr0
570// VPICKVE2GR_D a0, vr0, 0
571// ADDI_W a0, a0, 0
572SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
573 SelectionDAG &DAG) const {
574
575 SDLoc DL(Op);
576 MVT OpVT = Op.getSimpleValueType();
577 SDValue Val = Op.getOperand(0);
578
579 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
580 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
581
582 unsigned LegalVecSize = 128;
583 bool isLASX256Vector =
584 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
585
586 // Ensure operand type legal or enable it legal.
587 while (!isTypeLegal(Val.getSimpleValueType())) {
588 Val = DAG.WidenVector(Val, DL);
589 }
590
591 // NumEles is designed for iterations count, v4i32 for LSX
592 // and v8i32 for LASX should have the same count.
593 if (isLASX256Vector) {
594 NumEles /= 2;
595 LegalVecSize = 256;
596 }
597
598 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
599 MVT IntTy = MVT::getIntegerVT(EleBits);
600 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
601 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
602 }
603
604 if (isLASX256Vector) {
605 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
606 DAG.getConstant(2, DL, MVT::i64));
607 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
608 }
609
610 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
611 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
612}
613
614// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
615// For Example:
616// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
617// can be lowered to:
618// VBSRL_V vr1, vr0, 8
619// VMAX_W vr0, vr1, vr0
620// VBSRL_V vr1, vr0, 4
621// VMAX_W vr0, vr1, vr0
622// VPICKVE2GR_W a0, vr0, 0
623// For 256 bit vector, it is illegal and will be spilt into
624// two 128 bit vector by default then processed by this.
625SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
626 SelectionDAG &DAG) const {
627 SDLoc DL(Op);
628
629 MVT OpVT = Op.getSimpleValueType();
630 SDValue Val = Op.getOperand(0);
631
632 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
633 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
634
635 // Ensure operand type legal or enable it legal.
636 while (!isTypeLegal(Val.getSimpleValueType())) {
637 Val = DAG.WidenVector(Val, DL);
638 }
639
640 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
641 MVT VecTy = Val.getSimpleValueType();
642
643 for (int i = NumEles; i > 1; i /= 2) {
644 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
645 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
646 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
647 }
648
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
650 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
651}
652
653SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
654 SelectionDAG &DAG) const {
655 unsigned IsData = Op.getConstantOperandVal(4);
656
657 // We don't support non-data prefetch.
658 // Just preserve the chain.
659 if (!IsData)
660 return Op.getOperand(0);
661
662 return Op;
663}
664
665// Return true if Val is equal to (setcc LHS, RHS, CC).
666// Return false if Val is the inverse of (setcc LHS, RHS, CC).
667// Otherwise, return std::nullopt.
668static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
669 ISD::CondCode CC, SDValue Val) {
670 assert(Val->getOpcode() == ISD::SETCC);
671 SDValue LHS2 = Val.getOperand(0);
672 SDValue RHS2 = Val.getOperand(1);
673 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
674
675 if (LHS == LHS2 && RHS == RHS2) {
676 if (CC == CC2)
677 return true;
678 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
679 return false;
680 } else if (LHS == RHS2 && RHS == LHS2) {
682 if (CC == CC2)
683 return true;
684 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
685 return false;
686 }
687
688 return std::nullopt;
689}
690
692 const LoongArchSubtarget &Subtarget) {
693 SDValue CondV = N->getOperand(0);
694 SDValue TrueV = N->getOperand(1);
695 SDValue FalseV = N->getOperand(2);
696 MVT VT = N->getSimpleValueType(0);
697 SDLoc DL(N);
698
699 // (select c, -1, y) -> -c | y
700 if (isAllOnesConstant(TrueV)) {
701 SDValue Neg = DAG.getNegative(CondV, DL, VT);
702 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
703 }
704 // (select c, y, -1) -> (c-1) | y
705 if (isAllOnesConstant(FalseV)) {
706 SDValue Neg =
707 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
708 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
709 }
710
711 // (select c, 0, y) -> (c-1) & y
712 if (isNullConstant(TrueV)) {
713 SDValue Neg =
714 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
715 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
716 }
717 // (select c, y, 0) -> -c & y
718 if (isNullConstant(FalseV)) {
719 SDValue Neg = DAG.getNegative(CondV, DL, VT);
720 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
721 }
722
723 // select c, ~x, x --> xor -c, x
724 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
725 const APInt &TrueVal = TrueV->getAsAPIntVal();
726 const APInt &FalseVal = FalseV->getAsAPIntVal();
727 if (~TrueVal == FalseVal) {
728 SDValue Neg = DAG.getNegative(CondV, DL, VT);
729 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
730 }
731 }
732
733 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
734 // when both truev and falsev are also setcc.
735 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
736 FalseV.getOpcode() == ISD::SETCC) {
737 SDValue LHS = CondV.getOperand(0);
738 SDValue RHS = CondV.getOperand(1);
739 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
740
741 // (select x, x, y) -> x | y
742 // (select !x, x, y) -> x & y
743 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
744 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
745 DAG.getFreeze(FalseV));
746 }
747 // (select x, y, x) -> x & y
748 // (select !x, y, x) -> x | y
749 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
750 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
751 DAG.getFreeze(TrueV), FalseV);
752 }
753 }
754
755 return SDValue();
756}
757
758// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
759// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
760// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
761// being `0` or `-1`. In such cases we can replace `select` with `and`.
762// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
763// than `c0`?
764static SDValue
766 const LoongArchSubtarget &Subtarget) {
767 unsigned SelOpNo = 0;
768 SDValue Sel = BO->getOperand(0);
769 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
770 SelOpNo = 1;
771 Sel = BO->getOperand(1);
772 }
773
774 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
775 return SDValue();
776
777 unsigned ConstSelOpNo = 1;
778 unsigned OtherSelOpNo = 2;
779 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
780 ConstSelOpNo = 2;
781 OtherSelOpNo = 1;
782 }
783 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
784 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
785 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
786 return SDValue();
787
788 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
789 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
790 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
791 return SDValue();
792
793 SDLoc DL(Sel);
794 EVT VT = BO->getValueType(0);
795
796 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
797 if (SelOpNo == 1)
798 std::swap(NewConstOps[0], NewConstOps[1]);
799
800 SDValue NewConstOp =
801 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
802 if (!NewConstOp)
803 return SDValue();
804
805 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
806 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
807 return SDValue();
808
809 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
810 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
811 if (SelOpNo == 1)
812 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
813 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
814
815 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
816 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
817 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
818}
819
820// Changes the condition code and swaps operands if necessary, so the SetCC
821// operation matches one of the comparisons supported directly by branches
822// in the LoongArch ISA. May adjust compares to favor compare with 0 over
823// compare with 1/-1.
825 ISD::CondCode &CC, SelectionDAG &DAG) {
826 // If this is a single bit test that can't be handled by ANDI, shift the
827 // bit to be tested to the MSB and perform a signed compare with 0.
828 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
829 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
830 isa<ConstantSDNode>(LHS.getOperand(1))) {
831 uint64_t Mask = LHS.getConstantOperandVal(1);
832 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
833 unsigned ShAmt = 0;
834 if (isPowerOf2_64(Mask)) {
835 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
836 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
837 } else {
838 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
839 }
840
841 LHS = LHS.getOperand(0);
842 if (ShAmt != 0)
843 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
844 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
845 return;
846 }
847 }
848
849 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
850 int64_t C = RHSC->getSExtValue();
851 switch (CC) {
852 default:
853 break;
854 case ISD::SETGT:
855 // Convert X > -1 to X >= 0.
856 if (C == -1) {
857 RHS = DAG.getConstant(0, DL, RHS.getValueType());
858 CC = ISD::SETGE;
859 return;
860 }
861 break;
862 case ISD::SETLT:
863 // Convert X < 1 to 0 >= X.
864 if (C == 1) {
865 RHS = LHS;
866 LHS = DAG.getConstant(0, DL, RHS.getValueType());
867 CC = ISD::SETGE;
868 return;
869 }
870 break;
871 }
872 }
873
874 switch (CC) {
875 default:
876 break;
877 case ISD::SETGT:
878 case ISD::SETLE:
879 case ISD::SETUGT:
880 case ISD::SETULE:
882 std::swap(LHS, RHS);
883 break;
884 }
885}
886
887SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
888 SelectionDAG &DAG) const {
889 SDValue CondV = Op.getOperand(0);
890 SDValue TrueV = Op.getOperand(1);
891 SDValue FalseV = Op.getOperand(2);
892 SDLoc DL(Op);
893 MVT VT = Op.getSimpleValueType();
894 MVT GRLenVT = Subtarget.getGRLenVT();
895
896 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
897 return V;
898
899 if (Op.hasOneUse()) {
900 unsigned UseOpc = Op->user_begin()->getOpcode();
901 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
902 SDNode *BinOp = *Op->user_begin();
903 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
904 DAG, Subtarget)) {
905 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
906 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
907 // may return a constant node and cause crash in lowerSELECT.
908 if (NewSel.getOpcode() == ISD::SELECT)
909 return lowerSELECT(NewSel, DAG);
910 return NewSel;
911 }
912 }
913 }
914
915 // If the condition is not an integer SETCC which operates on GRLenVT, we need
916 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
917 // (select condv, truev, falsev)
918 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
919 if (CondV.getOpcode() != ISD::SETCC ||
920 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
921 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
922 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
923
924 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
925
926 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
927 }
928
929 // If the CondV is the output of a SETCC node which operates on GRLenVT
930 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
931 // to take advantage of the integer compare+branch instructions. i.e.: (select
932 // (setcc lhs, rhs, cc), truev, falsev)
933 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
934 SDValue LHS = CondV.getOperand(0);
935 SDValue RHS = CondV.getOperand(1);
936 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
937
938 // Special case for a select of 2 constants that have a difference of 1.
939 // Normally this is done by DAGCombine, but if the select is introduced by
940 // type legalization or op legalization, we miss it. Restricting to SETLT
941 // case for now because that is what signed saturating add/sub need.
942 // FIXME: We don't need the condition to be SETLT or even a SETCC,
943 // but we would probably want to swap the true/false values if the condition
944 // is SETGE/SETLE to avoid an XORI.
945 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
946 CCVal == ISD::SETLT) {
947 const APInt &TrueVal = TrueV->getAsAPIntVal();
948 const APInt &FalseVal = FalseV->getAsAPIntVal();
949 if (TrueVal - 1 == FalseVal)
950 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
951 if (TrueVal + 1 == FalseVal)
952 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
953 }
954
955 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
956 // 1 < x ? x : 1 -> 0 < x ? x : 1
957 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
958 RHS == TrueV && LHS == FalseV) {
959 LHS = DAG.getConstant(0, DL, VT);
960 // 0 <u x is the same as x != 0.
961 if (CCVal == ISD::SETULT) {
962 std::swap(LHS, RHS);
963 CCVal = ISD::SETNE;
964 }
965 }
966
967 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
968 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
969 RHS == FalseV) {
970 RHS = DAG.getConstant(0, DL, VT);
971 }
972
973 SDValue TargetCC = DAG.getCondCode(CCVal);
974
975 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
976 // (select (setcc lhs, rhs, CC), constant, falsev)
977 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
978 std::swap(TrueV, FalseV);
979 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
980 }
981
982 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
983 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
984}
985
986SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
987 SelectionDAG &DAG) const {
988 SDValue CondV = Op.getOperand(1);
989 SDLoc DL(Op);
990 MVT GRLenVT = Subtarget.getGRLenVT();
991
992 if (CondV.getOpcode() == ISD::SETCC) {
993 if (CondV.getOperand(0).getValueType() == GRLenVT) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
999
1000 SDValue TargetCC = DAG.getCondCode(CCVal);
1001 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1002 Op.getOperand(0), LHS, RHS, TargetCC,
1003 Op.getOperand(2));
1004 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1005 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1006 Op.getOperand(0), CondV, Op.getOperand(2));
1007 }
1008 }
1009
1010 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1011 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1012 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1013}
1014
1015SDValue
1016LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1017 SelectionDAG &DAG) const {
1018 SDLoc DL(Op);
1019 MVT OpVT = Op.getSimpleValueType();
1020
1021 SDValue Vector = DAG.getUNDEF(OpVT);
1022 SDValue Val = Op.getOperand(0);
1023 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1024
1025 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1026}
1027
1028SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1029 SelectionDAG &DAG) const {
1030 EVT ResTy = Op->getValueType(0);
1031 SDValue Src = Op->getOperand(0);
1032 SDLoc DL(Op);
1033
1034 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1035 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1036 unsigned int NewEltNum = NewVT.getVectorNumElements();
1037
1038 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1039
1041 for (unsigned int i = 0; i < NewEltNum; i++) {
1042 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1043 DAG.getConstant(i, DL, MVT::i64));
1044 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1045 ? (unsigned)LoongArchISD::BITREV_8B
1046 : (unsigned)ISD::BITREVERSE;
1047 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1048 }
1049 SDValue Res =
1050 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1051
1052 switch (ResTy.getSimpleVT().SimpleTy) {
1053 default:
1054 return SDValue();
1055 case MVT::v16i8:
1056 case MVT::v32i8:
1057 return Res;
1058 case MVT::v8i16:
1059 case MVT::v16i16:
1060 case MVT::v4i32:
1061 case MVT::v8i32: {
1063 for (unsigned int i = 0; i < NewEltNum; i++)
1064 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1065 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1066 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1067 }
1068 }
1069}
1070
1071// Widen element type to get a new mask value (if possible).
1072// For example:
1073// shufflevector <4 x i32> %a, <4 x i32> %b,
1074// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1075// is equivalent to:
1076// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1077// can be lowered to:
1078// VPACKOD_D vr0, vr0, vr1
1080 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1081 unsigned EltBits = VT.getScalarSizeInBits();
1082
1083 if (EltBits > 32 || EltBits == 1)
1084 return SDValue();
1085
1086 SmallVector<int, 8> NewMask;
1087 if (widenShuffleMaskElts(Mask, NewMask)) {
1088 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1089 : MVT::getIntegerVT(EltBits * 2);
1090 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1091 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1092 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1093 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1094 return DAG.getBitcast(
1095 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1096 }
1097 }
1098
1099 return SDValue();
1100}
1101
1102/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1103/// instruction.
1104// The funciton matches elements from one of the input vector shuffled to the
1105// left or right with zeroable elements 'shifted in'. It handles both the
1106// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1107// lane.
1108// Mostly copied from X86.
1109static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1110 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1111 int MaskOffset, const APInt &Zeroable) {
1112 int Size = Mask.size();
1113 unsigned SizeInBits = Size * ScalarSizeInBits;
1114
1115 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1116 for (int i = 0; i < Size; i += Scale)
1117 for (int j = 0; j < Shift; ++j)
1118 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1119 return false;
1120
1121 return true;
1122 };
1123
1124 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1125 int Step = 1) {
1126 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1127 if (!(Mask[i] == -1 || Mask[i] == Low))
1128 return false;
1129 return true;
1130 };
1131
1132 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1133 for (int i = 0; i != Size; i += Scale) {
1134 unsigned Pos = Left ? i + Shift : i;
1135 unsigned Low = Left ? i : i + Shift;
1136 unsigned Len = Scale - Shift;
1137 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1138 return -1;
1139 }
1140
1141 int ShiftEltBits = ScalarSizeInBits * Scale;
1142 bool ByteShift = ShiftEltBits > 64;
1143 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1144 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1145 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1146
1147 // Normalize the scale for byte shifts to still produce an i64 element
1148 // type.
1149 Scale = ByteShift ? Scale / 2 : Scale;
1150
1151 // We need to round trip through the appropriate type for the shift.
1152 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1153 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1154 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1155 return (int)ShiftAmt;
1156 };
1157
1158 unsigned MaxWidth = 128;
1159 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1160 for (int Shift = 1; Shift != Scale; ++Shift)
1161 for (bool Left : {true, false})
1162 if (CheckZeros(Shift, Scale, Left)) {
1163 int ShiftAmt = MatchShift(Shift, Scale, Left);
1164 if (0 < ShiftAmt)
1165 return ShiftAmt;
1166 }
1167
1168 // no match
1169 return -1;
1170}
1171
1172/// Lower VECTOR_SHUFFLE as shift (if possible).
1173///
1174/// For example:
1175/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1176/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1177/// is lowered to:
1178/// (VBSLL_V $v0, $v0, 4)
1179///
1180/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1181/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1182/// is lowered to:
1183/// (VSLLI_D $v0, $v0, 32)
1185 MVT VT, SDValue V1, SDValue V2,
1186 SelectionDAG &DAG,
1187 const LoongArchSubtarget &Subtarget,
1188 const APInt &Zeroable) {
1189 int Size = Mask.size();
1190 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1191
1192 MVT ShiftVT;
1193 SDValue V = V1;
1194 unsigned Opcode;
1195
1196 // Try to match shuffle against V1 shift.
1197 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1198 Mask, 0, Zeroable);
1199
1200 // If V1 failed, try to match shuffle against V2 shift.
1201 if (ShiftAmt < 0) {
1202 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1203 Mask, Size, Zeroable);
1204 V = V2;
1205 }
1206
1207 if (ShiftAmt < 0)
1208 return SDValue();
1209
1210 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1211 "Illegal integer vector type");
1212 V = DAG.getBitcast(ShiftVT, V);
1213 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1214 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1215 return DAG.getBitcast(VT, V);
1216}
1217
1218/// Determine whether a range fits a regular pattern of values.
1219/// This function accounts for the possibility of jumping over the End iterator.
1220template <typename ValType>
1221static bool
1223 unsigned CheckStride,
1225 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1226 auto &I = Begin;
1227
1228 while (I != End) {
1229 if (*I != -1 && *I != ExpectedIndex)
1230 return false;
1231 ExpectedIndex += ExpectedIndexStride;
1232
1233 // Incrementing past End is undefined behaviour so we must increment one
1234 // step at a time and check for End at each step.
1235 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1236 ; // Empty loop body.
1237 }
1238 return true;
1239}
1240
1241/// Compute whether each element of a shuffle is zeroable.
1242///
1243/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1245 SDValue V2, APInt &KnownUndef,
1246 APInt &KnownZero) {
1247 int Size = Mask.size();
1248 KnownUndef = KnownZero = APInt::getZero(Size);
1249
1250 V1 = peekThroughBitcasts(V1);
1251 V2 = peekThroughBitcasts(V2);
1252
1253 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1254 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1255
1256 int VectorSizeInBits = V1.getValueSizeInBits();
1257 int ScalarSizeInBits = VectorSizeInBits / Size;
1258 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1259 (void)ScalarSizeInBits;
1260
1261 for (int i = 0; i < Size; ++i) {
1262 int M = Mask[i];
1263 if (M < 0) {
1264 KnownUndef.setBit(i);
1265 continue;
1266 }
1267 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1268 KnownZero.setBit(i);
1269 continue;
1270 }
1271 }
1272}
1273
1274/// Test whether a shuffle mask is equivalent within each sub-lane.
1275///
1276/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1277/// non-trivial to compute in the face of undef lanes. The representation is
1278/// suitable for use with existing 128-bit shuffles as entries from the second
1279/// vector have been remapped to [LaneSize, 2*LaneSize).
1280static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1281 ArrayRef<int> Mask,
1282 SmallVectorImpl<int> &RepeatedMask) {
1283 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1284 RepeatedMask.assign(LaneSize, -1);
1285 int Size = Mask.size();
1286 for (int i = 0; i < Size; ++i) {
1287 assert(Mask[i] == -1 || Mask[i] >= 0);
1288 if (Mask[i] < 0)
1289 continue;
1290 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1291 // This entry crosses lanes, so there is no way to model this shuffle.
1292 return false;
1293
1294 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1295 // Adjust second vector indices to start at LaneSize instead of Size.
1296 int LocalM =
1297 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1298 if (RepeatedMask[i % LaneSize] < 0)
1299 // This is the first non-undef entry in this slot of a 128-bit lane.
1300 RepeatedMask[i % LaneSize] = LocalM;
1301 else if (RepeatedMask[i % LaneSize] != LocalM)
1302 // Found a mismatch with the repeated mask.
1303 return false;
1304 }
1305 return true;
1306}
1307
1308/// Attempts to match vector shuffle as byte rotation.
1310 ArrayRef<int> Mask) {
1311
1312 SDValue Lo, Hi;
1313 SmallVector<int, 16> RepeatedMask;
1314
1315 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1316 return -1;
1317
1318 int NumElts = RepeatedMask.size();
1319 int Rotation = 0;
1320 int Scale = 16 / NumElts;
1321
1322 for (int i = 0; i < NumElts; ++i) {
1323 int M = RepeatedMask[i];
1324 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1325 "Unexpected mask index.");
1326 if (M < 0)
1327 continue;
1328
1329 // Determine where a rotated vector would have started.
1330 int StartIdx = i - (M % NumElts);
1331 if (StartIdx == 0)
1332 return -1;
1333
1334 // If we found the tail of a vector the rotation must be the missing
1335 // front. If we found the head of a vector, it must be how much of the
1336 // head.
1337 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1338
1339 if (Rotation == 0)
1340 Rotation = CandidateRotation;
1341 else if (Rotation != CandidateRotation)
1342 return -1;
1343
1344 // Compute which value this mask is pointing at.
1345 SDValue MaskV = M < NumElts ? V1 : V2;
1346
1347 // Compute which of the two target values this index should be assigned
1348 // to. This reflects whether the high elements are remaining or the low
1349 // elements are remaining.
1350 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1351
1352 // Either set up this value if we've not encountered it before, or check
1353 // that it remains consistent.
1354 if (!TargetV)
1355 TargetV = MaskV;
1356 else if (TargetV != MaskV)
1357 return -1;
1358 }
1359
1360 // Check that we successfully analyzed the mask, and normalize the results.
1361 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1362 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1363 if (!Lo)
1364 Lo = Hi;
1365 else if (!Hi)
1366 Hi = Lo;
1367
1368 V1 = Lo;
1369 V2 = Hi;
1370
1371 return Rotation * Scale;
1372}
1373
1374/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1375///
1376/// For example:
1377/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1378/// <2 x i32> <i32 3, i32 0>
1379/// is lowered to:
1380/// (VBSRL_V $v1, $v1, 8)
1381/// (VBSLL_V $v0, $v0, 8)
1382/// (VOR_V $v0, $V0, $v1)
1383static SDValue
1385 SDValue V1, SDValue V2, SelectionDAG &DAG,
1386 const LoongArchSubtarget &Subtarget) {
1387
1388 SDValue Lo = V1, Hi = V2;
1389 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1390 if (ByteRotation <= 0)
1391 return SDValue();
1392
1393 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1394 Lo = DAG.getBitcast(ByteVT, Lo);
1395 Hi = DAG.getBitcast(ByteVT, Hi);
1396
1397 int LoByteShift = 16 - ByteRotation;
1398 int HiByteShift = ByteRotation;
1399 MVT GRLenVT = Subtarget.getGRLenVT();
1400
1401 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1402 DAG.getConstant(LoByteShift, DL, GRLenVT));
1403 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1404 DAG.getConstant(HiByteShift, DL, GRLenVT));
1405 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1406}
1407
1408/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1409///
1410/// For example:
1411/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1412/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1413/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1414/// is lowered to:
1415/// (VREPLI $v1, 0)
1416/// (VILVL $v0, $v1, $v0)
1418 ArrayRef<int> Mask, MVT VT,
1419 SDValue V1, SDValue V2,
1420 SelectionDAG &DAG,
1421 const APInt &Zeroable) {
1422 int Bits = VT.getSizeInBits();
1423 int EltBits = VT.getScalarSizeInBits();
1424 int NumElements = VT.getVectorNumElements();
1425
1426 if (Zeroable.isAllOnes())
1427 return DAG.getConstant(0, DL, VT);
1428
1429 // Define a helper function to check a particular ext-scale and lower to it if
1430 // valid.
1431 auto Lower = [&](int Scale) -> SDValue {
1432 SDValue InputV;
1433 bool AnyExt = true;
1434 int Offset = 0;
1435 for (int i = 0; i < NumElements; i++) {
1436 int M = Mask[i];
1437 if (M < 0)
1438 continue;
1439 if (i % Scale != 0) {
1440 // Each of the extended elements need to be zeroable.
1441 if (!Zeroable[i])
1442 return SDValue();
1443
1444 AnyExt = false;
1445 continue;
1446 }
1447
1448 // Each of the base elements needs to be consecutive indices into the
1449 // same input vector.
1450 SDValue V = M < NumElements ? V1 : V2;
1451 M = M % NumElements;
1452 if (!InputV) {
1453 InputV = V;
1454 Offset = M - (i / Scale);
1455
1456 // These offset can't be handled
1457 if (Offset % (NumElements / Scale))
1458 return SDValue();
1459 } else if (InputV != V)
1460 return SDValue();
1461
1462 if (M != (Offset + (i / Scale)))
1463 return SDValue(); // Non-consecutive strided elements.
1464 }
1465
1466 // If we fail to find an input, we have a zero-shuffle which should always
1467 // have already been handled.
1468 if (!InputV)
1469 return SDValue();
1470
1471 do {
1472 unsigned VilVLoHi = LoongArchISD::VILVL;
1473 if (Offset >= (NumElements / 2)) {
1474 VilVLoHi = LoongArchISD::VILVH;
1475 Offset -= (NumElements / 2);
1476 }
1477
1478 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1479 SDValue Ext =
1480 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1481 InputV = DAG.getBitcast(InputVT, InputV);
1482 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1483 Scale /= 2;
1484 EltBits *= 2;
1485 NumElements /= 2;
1486 } while (Scale > 1);
1487 return DAG.getBitcast(VT, InputV);
1488 };
1489
1490 // Each iteration, try extending the elements half as much, but into twice as
1491 // many elements.
1492 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1493 NumExtElements *= 2) {
1494 if (SDValue V = Lower(NumElements / NumExtElements))
1495 return V;
1496 }
1497 return SDValue();
1498}
1499
1500/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1501///
1502/// VREPLVEI performs vector broadcast based on an element specified by an
1503/// integer immediate, with its mask being similar to:
1504/// <x, x, x, ...>
1505/// where x is any valid index.
1506///
1507/// When undef's appear in the mask they are treated as if they were whatever
1508/// value is necessary in order to fit the above form.
1509static SDValue
1511 SDValue V1, SDValue V2, SelectionDAG &DAG,
1512 const LoongArchSubtarget &Subtarget) {
1513 int SplatIndex = -1;
1514 for (const auto &M : Mask) {
1515 if (M != -1) {
1516 SplatIndex = M;
1517 break;
1518 }
1519 }
1520
1521 if (SplatIndex == -1)
1522 return DAG.getUNDEF(VT);
1523
1524 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1525 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1526 APInt Imm(64, SplatIndex);
1527 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1528 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1529 }
1530
1531 return SDValue();
1532}
1533
1534/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1535///
1536/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1537/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1538///
1539/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1540/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1541/// When undef's appear they are treated as if they were whatever value is
1542/// necessary in order to fit the above forms.
1543///
1544/// For example:
1545/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1546/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1547/// i32 7, i32 6, i32 5, i32 4>
1548/// is lowered to:
1549/// (VSHUF4I_H $v0, $v1, 27)
1550/// where the 27 comes from:
1551/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1552static SDValue
1554 SDValue V1, SDValue V2, SelectionDAG &DAG,
1555 const LoongArchSubtarget &Subtarget) {
1556
1557 unsigned SubVecSize = 4;
1558 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1559 SubVecSize = 2;
1560
1561 int SubMask[4] = {-1, -1, -1, -1};
1562 for (unsigned i = 0; i < SubVecSize; ++i) {
1563 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1564 int M = Mask[j];
1565
1566 // Convert from vector index to 4-element subvector index
1567 // If an index refers to an element outside of the subvector then give up
1568 if (M != -1) {
1569 M -= 4 * (j / SubVecSize);
1570 if (M < 0 || M >= 4)
1571 return SDValue();
1572 }
1573
1574 // If the mask has an undef, replace it with the current index.
1575 // Note that it might still be undef if the current index is also undef
1576 if (SubMask[i] == -1)
1577 SubMask[i] = M;
1578 // Check that non-undef values are the same as in the mask. If they
1579 // aren't then give up
1580 else if (M != -1 && M != SubMask[i])
1581 return SDValue();
1582 }
1583 }
1584
1585 // Calculate the immediate. Replace any remaining undefs with zero
1586 APInt Imm(64, 0);
1587 for (int i = SubVecSize - 1; i >= 0; --i) {
1588 int M = SubMask[i];
1589
1590 if (M == -1)
1591 M = 0;
1592
1593 Imm <<= 2;
1594 Imm |= M & 0x3;
1595 }
1596
1597 MVT GRLenVT = Subtarget.getGRLenVT();
1598
1599 // Return vshuf4i.d
1600 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1601 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1602 DAG.getConstant(Imm, DL, GRLenVT));
1603
1604 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1605 DAG.getConstant(Imm, DL, GRLenVT));
1606}
1607
1608/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1609///
1610/// VPACKEV interleaves the even elements from each vector.
1611///
1612/// It is possible to lower into VPACKEV when the mask consists of two of the
1613/// following forms interleaved:
1614/// <0, 2, 4, ...>
1615/// <n, n+2, n+4, ...>
1616/// where n is the number of elements in the vector.
1617/// For example:
1618/// <0, 0, 2, 2, 4, 4, ...>
1619/// <0, n, 2, n+2, 4, n+4, ...>
1620///
1621/// When undef's appear in the mask they are treated as if they were whatever
1622/// value is necessary in order to fit the above forms.
1624 MVT VT, SDValue V1, SDValue V2,
1625 SelectionDAG &DAG) {
1626
1627 const auto &Begin = Mask.begin();
1628 const auto &End = Mask.end();
1629 SDValue OriV1 = V1, OriV2 = V2;
1630
1631 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1632 V1 = OriV1;
1633 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1634 V1 = OriV2;
1635 else
1636 return SDValue();
1637
1638 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1639 V2 = OriV1;
1640 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1641 V2 = OriV2;
1642 else
1643 return SDValue();
1644
1645 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1649///
1650/// VPACKOD interleaves the odd elements from each vector.
1651///
1652/// It is possible to lower into VPACKOD when the mask consists of two of the
1653/// following forms interleaved:
1654/// <1, 3, 5, ...>
1655/// <n+1, n+3, n+5, ...>
1656/// where n is the number of elements in the vector.
1657/// For example:
1658/// <1, 1, 3, 3, 5, 5, ...>
1659/// <1, n+1, 3, n+3, 5, n+5, ...>
1660///
1661/// When undef's appear in the mask they are treated as if they were whatever
1662/// value is necessary in order to fit the above forms.
1664 MVT VT, SDValue V1, SDValue V2,
1665 SelectionDAG &DAG) {
1666
1667 const auto &Begin = Mask.begin();
1668 const auto &End = Mask.end();
1669 SDValue OriV1 = V1, OriV2 = V2;
1670
1671 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1672 V1 = OriV1;
1673 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1674 V1 = OriV2;
1675 else
1676 return SDValue();
1677
1678 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1679 V2 = OriV1;
1680 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1681 V2 = OriV2;
1682 else
1683 return SDValue();
1684
1685 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1686}
1687
1688/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1689///
1690/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1691/// of each vector.
1692///
1693/// It is possible to lower into VILVH when the mask consists of two of the
1694/// following forms interleaved:
1695/// <x, x+1, x+2, ...>
1696/// <n+x, n+x+1, n+x+2, ...>
1697/// where n is the number of elements in the vector and x is half n.
1698/// For example:
1699/// <x, x, x+1, x+1, x+2, x+2, ...>
1700/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1701///
1702/// When undef's appear in the mask they are treated as if they were whatever
1703/// value is necessary in order to fit the above forms.
1705 MVT VT, SDValue V1, SDValue V2,
1706 SelectionDAG &DAG) {
1707
1708 const auto &Begin = Mask.begin();
1709 const auto &End = Mask.end();
1710 unsigned HalfSize = Mask.size() / 2;
1711 SDValue OriV1 = V1, OriV2 = V2;
1712
1713 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1714 V1 = OriV1;
1715 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1716 V1 = OriV2;
1717 else
1718 return SDValue();
1719
1720 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1721 V2 = OriV1;
1722 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1723 1))
1724 V2 = OriV2;
1725 else
1726 return SDValue();
1727
1728 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1729}
1730
1731/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1732///
1733/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1734/// of each vector.
1735///
1736/// It is possible to lower into VILVL when the mask consists of two of the
1737/// following forms interleaved:
1738/// <0, 1, 2, ...>
1739/// <n, n+1, n+2, ...>
1740/// where n is the number of elements in the vector.
1741/// For example:
1742/// <0, 0, 1, 1, 2, 2, ...>
1743/// <0, n, 1, n+1, 2, n+2, ...>
1744///
1745/// When undef's appear in the mask they are treated as if they were whatever
1746/// value is necessary in order to fit the above forms.
1748 MVT VT, SDValue V1, SDValue V2,
1749 SelectionDAG &DAG) {
1750
1751 const auto &Begin = Mask.begin();
1752 const auto &End = Mask.end();
1753 SDValue OriV1 = V1, OriV2 = V2;
1754
1755 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1756 V1 = OriV1;
1757 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1758 V1 = OriV2;
1759 else
1760 return SDValue();
1761
1762 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1763 V2 = OriV1;
1764 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1765 V2 = OriV2;
1766 else
1767 return SDValue();
1768
1769 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1770}
1771
1772/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1773///
1774/// VPICKEV copies the even elements of each vector into the result vector.
1775///
1776/// It is possible to lower into VPICKEV when the mask consists of two of the
1777/// following forms concatenated:
1778/// <0, 2, 4, ...>
1779/// <n, n+2, n+4, ...>
1780/// where n is the number of elements in the vector.
1781/// For example:
1782/// <0, 2, 4, ..., 0, 2, 4, ...>
1783/// <0, 2, 4, ..., n, n+2, n+4, ...>
1784///
1785/// When undef's appear in the mask they are treated as if they were whatever
1786/// value is necessary in order to fit the above forms.
1788 MVT VT, SDValue V1, SDValue V2,
1789 SelectionDAG &DAG) {
1790
1791 const auto &Begin = Mask.begin();
1792 const auto &Mid = Mask.begin() + Mask.size() / 2;
1793 const auto &End = Mask.end();
1794 SDValue OriV1 = V1, OriV2 = V2;
1795
1796 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1797 V1 = OriV1;
1798 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1799 V1 = OriV2;
1800 else
1801 return SDValue();
1802
1803 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1804 V2 = OriV1;
1805 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1806 V2 = OriV2;
1807
1808 else
1809 return SDValue();
1810
1811 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1812}
1813
1814/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1815///
1816/// VPICKOD copies the odd elements of each vector into the result vector.
1817///
1818/// It is possible to lower into VPICKOD when the mask consists of two of the
1819/// following forms concatenated:
1820/// <1, 3, 5, ...>
1821/// <n+1, n+3, n+5, ...>
1822/// where n is the number of elements in the vector.
1823/// For example:
1824/// <1, 3, 5, ..., 1, 3, 5, ...>
1825/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1826///
1827/// When undef's appear in the mask they are treated as if they were whatever
1828/// value is necessary in order to fit the above forms.
1830 MVT VT, SDValue V1, SDValue V2,
1831 SelectionDAG &DAG) {
1832
1833 const auto &Begin = Mask.begin();
1834 const auto &Mid = Mask.begin() + Mask.size() / 2;
1835 const auto &End = Mask.end();
1836 SDValue OriV1 = V1, OriV2 = V2;
1837
1838 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1839 V1 = OriV1;
1840 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1841 V1 = OriV2;
1842 else
1843 return SDValue();
1844
1845 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1846 V2 = OriV1;
1847 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1848 V2 = OriV2;
1849 else
1850 return SDValue();
1851
1852 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1853}
1854
1855/// Lower VECTOR_SHUFFLE into VSHUF.
1856///
1857/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1858/// adding it as an operand to the resulting VSHUF.
1860 MVT VT, SDValue V1, SDValue V2,
1861 SelectionDAG &DAG) {
1862
1864 for (auto M : Mask)
1865 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1866
1867 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1868 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1869
1870 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1871 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1872 // VSHF concatenates the vectors in a bitwise fashion:
1873 // <0b00, 0b01> + <0b10, 0b11> ->
1874 // 0b0100 + 0b1110 -> 0b01001110
1875 // <0b10, 0b11, 0b00, 0b01>
1876 // We must therefore swap the operands to get the correct result.
1877 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1878}
1879
1880/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1881///
1882/// This routine breaks down the specific type of 128-bit shuffle and
1883/// dispatches to the lowering routines accordingly.
1885 SDValue V1, SDValue V2, SelectionDAG &DAG,
1886 const LoongArchSubtarget &Subtarget) {
1887 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1888 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1889 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1890 "Vector type is unsupported for lsx!");
1892 "Two operands have different types!");
1893 assert(VT.getVectorNumElements() == Mask.size() &&
1894 "Unexpected mask size for shuffle!");
1895 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1896
1897 APInt KnownUndef, KnownZero;
1898 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1899 APInt Zeroable = KnownUndef | KnownZero;
1900
1901 SDValue Result;
1902 // TODO: Add more comparison patterns.
1903 if (V2.isUndef()) {
1904 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1905 Subtarget)))
1906 return Result;
1907 if ((Result =
1908 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1909 return Result;
1910
1911 // TODO: This comment may be enabled in the future to better match the
1912 // pattern for instruction selection.
1913 /* V2 = V1; */
1914 }
1915
1916 // It is recommended not to change the pattern comparison order for better
1917 // performance.
1918 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1919 return Result;
1920 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1921 return Result;
1922 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1923 return Result;
1924 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1925 return Result;
1926 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1927 return Result;
1928 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1929 return Result;
1930 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1931 (Result =
1932 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1933 return Result;
1934 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1935 Zeroable)))
1936 return Result;
1937 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1938 Zeroable)))
1939 return Result;
1940 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1941 Subtarget)))
1942 return Result;
1943 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1944 return NewShuffle;
1945 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1946 return Result;
1947 return SDValue();
1948}
1949
1950/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1951///
1952/// It is a XVREPLVEI when the mask is:
1953/// <x, x, x, ..., x+n, x+n, x+n, ...>
1954/// where the number of x is equal to n and n is half the length of vector.
1955///
1956/// When undef's appear in the mask they are treated as if they were whatever
1957/// value is necessary in order to fit the above form.
1958static SDValue
1960 SDValue V1, SDValue V2, SelectionDAG &DAG,
1961 const LoongArchSubtarget &Subtarget) {
1962 int SplatIndex = -1;
1963 for (const auto &M : Mask) {
1964 if (M != -1) {
1965 SplatIndex = M;
1966 break;
1967 }
1968 }
1969
1970 if (SplatIndex == -1)
1971 return DAG.getUNDEF(VT);
1972
1973 const auto &Begin = Mask.begin();
1974 const auto &End = Mask.end();
1975 unsigned HalfSize = Mask.size() / 2;
1976
1977 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1978 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1979 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1980 0)) {
1981 APInt Imm(64, SplatIndex);
1982 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1983 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1984 }
1985
1986 return SDValue();
1987}
1988
1989/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1990static SDValue
1992 SDValue V1, SDValue V2, SelectionDAG &DAG,
1993 const LoongArchSubtarget &Subtarget) {
1994 // When the size is less than or equal to 4, lower cost instructions may be
1995 // used.
1996 if (Mask.size() <= 4)
1997 return SDValue();
1998 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1999}
2000
2001/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2003 MVT VT, SDValue V1, SDValue V2,
2004 SelectionDAG &DAG) {
2005 // LoongArch LASX only have XVPERM_W.
2006 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2007 return SDValue();
2008
2009 unsigned NumElts = VT.getVectorNumElements();
2010 unsigned HalfSize = NumElts / 2;
2011 bool FrontLo = true, FrontHi = true;
2012 bool BackLo = true, BackHi = true;
2013
2014 auto inRange = [](int val, int low, int high) {
2015 return (val == -1) || (val >= low && val < high);
2016 };
2017
2018 for (unsigned i = 0; i < HalfSize; ++i) {
2019 int Fronti = Mask[i];
2020 int Backi = Mask[i + HalfSize];
2021
2022 FrontLo &= inRange(Fronti, 0, HalfSize);
2023 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2024 BackLo &= inRange(Backi, 0, HalfSize);
2025 BackHi &= inRange(Backi, HalfSize, NumElts);
2026 }
2027
2028 // If both the lower and upper 128-bit parts access only one half of the
2029 // vector (either lower or upper), avoid using xvperm.w. The latency of
2030 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2031 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2032 return SDValue();
2033
2035 for (unsigned i = 0; i < NumElts; ++i)
2036 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2037 : DAG.getConstant(Mask[i], DL, MVT::i64));
2038 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2039
2040 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2041}
2042
2043/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2048}
2049
2050/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2052 MVT VT, SDValue V1, SDValue V2,
2053 SelectionDAG &DAG) {
2054 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2055}
2056
2057/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2059 MVT VT, SDValue V1, SDValue V2,
2060 SelectionDAG &DAG) {
2061
2062 const auto &Begin = Mask.begin();
2063 const auto &End = Mask.end();
2064 unsigned HalfSize = Mask.size() / 2;
2065 unsigned LeftSize = HalfSize / 2;
2066 SDValue OriV1 = V1, OriV2 = V2;
2067
2068 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2069 1) &&
2070 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2071 V1 = OriV1;
2072 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2073 Mask.size() + HalfSize - LeftSize, 1) &&
2074 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2075 Mask.size() + HalfSize + LeftSize, 1))
2076 V1 = OriV2;
2077 else
2078 return SDValue();
2079
2080 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2081 1) &&
2082 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2083 1))
2084 V2 = OriV1;
2085 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2086 Mask.size() + HalfSize - LeftSize, 1) &&
2087 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2088 Mask.size() + HalfSize + LeftSize, 1))
2089 V2 = OriV2;
2090 else
2091 return SDValue();
2092
2093 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2094}
2095
2096/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2098 MVT VT, SDValue V1, SDValue V2,
2099 SelectionDAG &DAG) {
2100
2101 const auto &Begin = Mask.begin();
2102 const auto &End = Mask.end();
2103 unsigned HalfSize = Mask.size() / 2;
2104 SDValue OriV1 = V1, OriV2 = V2;
2105
2106 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2107 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2108 V1 = OriV1;
2109 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2110 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2111 Mask.size() + HalfSize, 1))
2112 V1 = OriV2;
2113 else
2114 return SDValue();
2115
2116 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2117 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2118 V2 = OriV1;
2119 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2120 1) &&
2121 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2122 Mask.size() + HalfSize, 1))
2123 V2 = OriV2;
2124 else
2125 return SDValue();
2126
2127 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2128}
2129
2130/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2132 MVT VT, SDValue V1, SDValue V2,
2133 SelectionDAG &DAG) {
2134
2135 const auto &Begin = Mask.begin();
2136 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2137 const auto &Mid = Mask.begin() + Mask.size() / 2;
2138 const auto &RightMid = Mask.end() - Mask.size() / 4;
2139 const auto &End = Mask.end();
2140 unsigned HalfSize = Mask.size() / 2;
2141 SDValue OriV1 = V1, OriV2 = V2;
2142
2143 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2144 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2145 V1 = OriV1;
2146 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2147 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2148 V1 = OriV2;
2149 else
2150 return SDValue();
2151
2152 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2153 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2154 V2 = OriV1;
2155 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2156 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2157 V2 = OriV2;
2158
2159 else
2160 return SDValue();
2161
2162 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2163}
2164
2165/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2167 MVT VT, SDValue V1, SDValue V2,
2168 SelectionDAG &DAG) {
2169
2170 const auto &Begin = Mask.begin();
2171 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2172 const auto &Mid = Mask.begin() + Mask.size() / 2;
2173 const auto &RightMid = Mask.end() - Mask.size() / 4;
2174 const auto &End = Mask.end();
2175 unsigned HalfSize = Mask.size() / 2;
2176 SDValue OriV1 = V1, OriV2 = V2;
2177
2178 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2179 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2180 V1 = OriV1;
2181 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2182 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2183 2))
2184 V1 = OriV2;
2185 else
2186 return SDValue();
2187
2188 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2189 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2190 V2 = OriV1;
2191 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2192 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2193 2))
2194 V2 = OriV2;
2195 else
2196 return SDValue();
2197
2198 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2199}
2200
2201/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2203 MVT VT, SDValue V1, SDValue V2,
2204 SelectionDAG &DAG) {
2205
2206 int MaskSize = Mask.size();
2207 int HalfSize = Mask.size() / 2;
2208 const auto &Begin = Mask.begin();
2209 const auto &Mid = Mask.begin() + HalfSize;
2210 const auto &End = Mask.end();
2211
2212 // VECTOR_SHUFFLE concatenates the vectors:
2213 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2214 // shuffling ->
2215 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2216 //
2217 // XVSHUF concatenates the vectors:
2218 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2219 // shuffling ->
2220 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2221 SmallVector<SDValue, 8> MaskAlloc;
2222 for (auto it = Begin; it < Mid; it++) {
2223 if (*it < 0) // UNDEF
2224 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2225 else if ((*it >= 0 && *it < HalfSize) ||
2226 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2227 int M = *it < HalfSize ? *it : *it - HalfSize;
2228 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2229 } else
2230 return SDValue();
2231 }
2232 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2233
2234 for (auto it = Mid; it < End; it++) {
2235 if (*it < 0) // UNDEF
2236 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2237 else if ((*it >= HalfSize && *it < MaskSize) ||
2238 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2239 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2240 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2241 } else
2242 return SDValue();
2243 }
2244 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2245
2246 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2247 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2248 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2249}
2250
2251/// Shuffle vectors by lane to generate more optimized instructions.
2252/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2253///
2254/// Therefore, except for the following four cases, other cases are regarded
2255/// as cross-lane shuffles, where optimization is relatively limited.
2256///
2257/// - Shuffle high, low lanes of two inputs vector
2258/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2259/// - Shuffle low, high lanes of two inputs vector
2260/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2261/// - Shuffle low, low lanes of two inputs vector
2262/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2263/// - Shuffle high, high lanes of two inputs vector
2264/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2265///
2266/// The first case is the closest to LoongArch instructions and the other
2267/// cases need to be converted to it for processing.
2268///
2269/// This function may modify V1, V2 and Mask
2271 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2272 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2273
2274 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2275
2276 int MaskSize = Mask.size();
2277 int HalfSize = Mask.size() / 2;
2278 MVT GRLenVT = Subtarget.getGRLenVT();
2279
2280 HalfMaskType preMask = None, postMask = None;
2281
2282 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2283 return M < 0 || (M >= 0 && M < HalfSize) ||
2284 (M >= MaskSize && M < MaskSize + HalfSize);
2285 }))
2286 preMask = HighLaneTy;
2287 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2288 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2289 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2290 }))
2291 preMask = LowLaneTy;
2292
2293 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2294 return M < 0 || (M >= 0 && M < HalfSize) ||
2295 (M >= MaskSize && M < MaskSize + HalfSize);
2296 }))
2297 postMask = HighLaneTy;
2298 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2299 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2300 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2301 }))
2302 postMask = LowLaneTy;
2303
2304 // The pre-half of mask is high lane type, and the post-half of mask
2305 // is low lane type, which is closest to the LoongArch instructions.
2306 //
2307 // Note: In the LoongArch architecture, the high lane of mask corresponds
2308 // to the lower 128-bit of vector register, and the low lane of mask
2309 // corresponds the higher 128-bit of vector register.
2310 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2311 return;
2312 }
2313 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2314 V1 = DAG.getBitcast(MVT::v4i64, V1);
2315 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2316 DAG.getConstant(0b01001110, DL, GRLenVT));
2317 V1 = DAG.getBitcast(VT, V1);
2318
2319 if (!V2.isUndef()) {
2320 V2 = DAG.getBitcast(MVT::v4i64, V2);
2321 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2322 DAG.getConstant(0b01001110, DL, GRLenVT));
2323 V2 = DAG.getBitcast(VT, V2);
2324 }
2325
2326 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2327 *it = *it < 0 ? *it : *it - HalfSize;
2328 }
2329 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2330 *it = *it < 0 ? *it : *it + HalfSize;
2331 }
2332 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2333 V1 = DAG.getBitcast(MVT::v4i64, V1);
2334 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2335 DAG.getConstant(0b11101110, DL, GRLenVT));
2336 V1 = DAG.getBitcast(VT, V1);
2337
2338 if (!V2.isUndef()) {
2339 V2 = DAG.getBitcast(MVT::v4i64, V2);
2340 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2341 DAG.getConstant(0b11101110, DL, GRLenVT));
2342 V2 = DAG.getBitcast(VT, V2);
2343 }
2344
2345 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2346 *it = *it < 0 ? *it : *it - HalfSize;
2347 }
2348 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2349 V1 = DAG.getBitcast(MVT::v4i64, V1);
2350 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2351 DAG.getConstant(0b01000100, DL, GRLenVT));
2352 V1 = DAG.getBitcast(VT, V1);
2353
2354 if (!V2.isUndef()) {
2355 V2 = DAG.getBitcast(MVT::v4i64, V2);
2356 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2357 DAG.getConstant(0b01000100, DL, GRLenVT));
2358 V2 = DAG.getBitcast(VT, V2);
2359 }
2360
2361 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2362 *it = *it < 0 ? *it : *it + HalfSize;
2363 }
2364 } else { // cross-lane
2365 return;
2366 }
2367}
2368
2369/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2370/// Only for 256-bit vector.
2371///
2372/// For example:
2373/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2374/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2375/// is lowerded to:
2376/// (XVPERMI $xr2, $xr0, 78)
2377/// (XVSHUF $xr1, $xr2, $xr0)
2378/// (XVORI $xr0, $xr1, 0)
2380 ArrayRef<int> Mask,
2381 MVT VT, SDValue V1,
2382 SDValue V2,
2383 SelectionDAG &DAG) {
2384 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2385 int Size = Mask.size();
2386 int LaneSize = Size / 2;
2387
2388 bool LaneCrossing[2] = {false, false};
2389 for (int i = 0; i < Size; ++i)
2390 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2391 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2392
2393 // Ensure that all lanes ared involved.
2394 if (!LaneCrossing[0] && !LaneCrossing[1])
2395 return SDValue();
2396
2397 SmallVector<int> InLaneMask;
2398 InLaneMask.assign(Mask.begin(), Mask.end());
2399 for (int i = 0; i < Size; ++i) {
2400 int &M = InLaneMask[i];
2401 if (M < 0)
2402 continue;
2403 if (((M % Size) / LaneSize) != (i / LaneSize))
2404 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2405 }
2406
2407 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2408 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2409 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2410 Flipped = DAG.getBitcast(VT, Flipped);
2411 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2412}
2413
2414/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2415///
2416/// This routine breaks down the specific type of 256-bit shuffle and
2417/// dispatches to the lowering routines accordingly.
2419 SDValue V1, SDValue V2, SelectionDAG &DAG,
2420 const LoongArchSubtarget &Subtarget) {
2421 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2422 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2423 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2424 "Vector type is unsupported for lasx!");
2426 "Two operands have different types!");
2427 assert(VT.getVectorNumElements() == Mask.size() &&
2428 "Unexpected mask size for shuffle!");
2429 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2430 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2431
2432 // canonicalize non cross-lane shuffle vector
2433 SmallVector<int> NewMask(Mask);
2434 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2435
2436 APInt KnownUndef, KnownZero;
2437 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2438 APInt Zeroable = KnownUndef | KnownZero;
2439
2440 SDValue Result;
2441 // TODO: Add more comparison patterns.
2442 if (V2.isUndef()) {
2443 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2444 Subtarget)))
2445 return Result;
2446 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2447 Subtarget)))
2448 return Result;
2449 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2450 return Result;
2451 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2452 V1, V2, DAG)))
2453 return Result;
2454
2455 // TODO: This comment may be enabled in the future to better match the
2456 // pattern for instruction selection.
2457 /* V2 = V1; */
2458 }
2459
2460 // It is recommended not to change the pattern comparison order for better
2461 // performance.
2462 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2463 return Result;
2464 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2465 return Result;
2466 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2467 return Result;
2468 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2469 return Result;
2470 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2471 return Result;
2472 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2473 return Result;
2474 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2475 Subtarget, Zeroable)))
2476 return Result;
2477 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2478 Subtarget)))
2479 return Result;
2480 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2481 return NewShuffle;
2482 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2483 return Result;
2484
2485 return SDValue();
2486}
2487
2488SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2489 SelectionDAG &DAG) const {
2490 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2491 ArrayRef<int> OrigMask = SVOp->getMask();
2492 SDValue V1 = Op.getOperand(0);
2493 SDValue V2 = Op.getOperand(1);
2494 MVT VT = Op.getSimpleValueType();
2495 int NumElements = VT.getVectorNumElements();
2496 SDLoc DL(Op);
2497
2498 bool V1IsUndef = V1.isUndef();
2499 bool V2IsUndef = V2.isUndef();
2500 if (V1IsUndef && V2IsUndef)
2501 return DAG.getUNDEF(VT);
2502
2503 // When we create a shuffle node we put the UNDEF node to second operand,
2504 // but in some cases the first operand may be transformed to UNDEF.
2505 // In this case we should just commute the node.
2506 if (V1IsUndef)
2507 return DAG.getCommutedVectorShuffle(*SVOp);
2508
2509 // Check for non-undef masks pointing at an undef vector and make the masks
2510 // undef as well. This makes it easier to match the shuffle based solely on
2511 // the mask.
2512 if (V2IsUndef &&
2513 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2514 SmallVector<int, 8> NewMask(OrigMask);
2515 for (int &M : NewMask)
2516 if (M >= NumElements)
2517 M = -1;
2518 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2519 }
2520
2521 // Check for illegal shuffle mask element index values.
2522 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2523 (void)MaskUpperLimit;
2524 assert(llvm::all_of(OrigMask,
2525 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2526 "Out of bounds shuffle index");
2527
2528 // For each vector width, delegate to a specialized lowering routine.
2529 if (VT.is128BitVector())
2530 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2531
2532 if (VT.is256BitVector())
2533 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2534
2535 return SDValue();
2536}
2537
2538SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2539 SelectionDAG &DAG) const {
2540 // Custom lower to ensure the libcall return is passed in an FPR on hard
2541 // float ABIs.
2542 SDLoc DL(Op);
2543 MakeLibCallOptions CallOptions;
2544 SDValue Op0 = Op.getOperand(0);
2545 SDValue Chain = SDValue();
2546 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2547 SDValue Res;
2548 std::tie(Res, Chain) =
2549 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2550 if (Subtarget.is64Bit())
2551 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2552 return DAG.getBitcast(MVT::i32, Res);
2553}
2554
2555SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2556 SelectionDAG &DAG) const {
2557 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2558 // float ABIs.
2559 SDLoc DL(Op);
2560 MakeLibCallOptions CallOptions;
2561 SDValue Op0 = Op.getOperand(0);
2562 SDValue Chain = SDValue();
2563 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2564 DL, MVT::f32, Op0)
2565 : DAG.getBitcast(MVT::f32, Op0);
2566 SDValue Res;
2567 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2568 CallOptions, DL, Chain);
2569 return Res;
2570}
2571
2572SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2573 SelectionDAG &DAG) const {
2574 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2575 SDLoc DL(Op);
2576 MakeLibCallOptions CallOptions;
2577 RTLIB::Libcall LC =
2578 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2579 SDValue Res =
2580 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2581 if (Subtarget.is64Bit())
2582 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2583 return DAG.getBitcast(MVT::i32, Res);
2584}
2585
2586SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2587 SelectionDAG &DAG) const {
2588 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2589 MVT VT = Op.getSimpleValueType();
2590 SDLoc DL(Op);
2591 Op = DAG.getNode(
2592 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2593 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2594 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2595 DL, MVT::f32, Op)
2596 : DAG.getBitcast(MVT::f32, Op);
2597 if (VT != MVT::f32)
2598 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2599 return Res;
2600}
2601
2602// Lower BUILD_VECTOR as broadcast load (if possible).
2603// For example:
2604// %a = load i8, ptr %ptr
2605// %b = build_vector %a, %a, %a, %a
2606// is lowered to :
2607// (VLDREPL_B $a0, 0)
2609 const SDLoc &DL,
2610 SelectionDAG &DAG) {
2611 MVT VT = BVOp->getSimpleValueType(0);
2612 int NumOps = BVOp->getNumOperands();
2613
2614 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2615 "Unsupported vector type for broadcast.");
2616
2617 SDValue IdentitySrc;
2618 bool IsIdeneity = true;
2619
2620 for (int i = 0; i != NumOps; i++) {
2621 SDValue Op = BVOp->getOperand(i);
2622 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2623 IsIdeneity = false;
2624 break;
2625 }
2626 IdentitySrc = BVOp->getOperand(0);
2627 }
2628
2629 // make sure that this load is valid and only has one user.
2630 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2631 return SDValue();
2632
2633 auto *LN = cast<LoadSDNode>(IdentitySrc);
2634 auto ExtType = LN->getExtensionType();
2635
2636 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2637 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2638 SDVTList Tys =
2639 LN->isIndexed()
2640 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2641 : DAG.getVTList(VT, MVT::Other);
2642 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2643 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2644 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2645 return BCast;
2646 }
2647 return SDValue();
2648}
2649
2650SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2651 SelectionDAG &DAG) const {
2652 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2653 EVT ResTy = Op->getValueType(0);
2654 unsigned NumElts = ResTy.getVectorNumElements();
2655 SDLoc DL(Op);
2656 APInt SplatValue, SplatUndef;
2657 unsigned SplatBitSize;
2658 bool HasAnyUndefs;
2659 bool IsConstant = false;
2660 bool UseSameConstant = true;
2661 SDValue ConstantValue;
2662 bool Is128Vec = ResTy.is128BitVector();
2663 bool Is256Vec = ResTy.is256BitVector();
2664
2665 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2666 (!Subtarget.hasExtLASX() || !Is256Vec))
2667 return SDValue();
2668
2669 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2670 return Result;
2671
2672 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2673 /*MinSplatBits=*/8) &&
2674 SplatBitSize <= 64) {
2675 // We can only cope with 8, 16, 32, or 64-bit elements.
2676 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2677 SplatBitSize != 64)
2678 return SDValue();
2679
2680 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2681 // We can only handle 64-bit elements that are within
2682 // the signed 10-bit range on 32-bit targets.
2683 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2684 if (!SplatValue.isSignedIntN(10))
2685 return SDValue();
2686 if ((Is128Vec && ResTy == MVT::v4i32) ||
2687 (Is256Vec && ResTy == MVT::v8i32))
2688 return Op;
2689 }
2690
2691 EVT ViaVecTy;
2692
2693 switch (SplatBitSize) {
2694 default:
2695 return SDValue();
2696 case 8:
2697 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2698 break;
2699 case 16:
2700 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2701 break;
2702 case 32:
2703 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2704 break;
2705 case 64:
2706 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2707 break;
2708 }
2709
2710 // SelectionDAG::getConstant will promote SplatValue appropriately.
2711 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2712
2713 // Bitcast to the type we originally wanted.
2714 if (ViaVecTy != ResTy)
2715 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2716
2717 return Result;
2718 }
2719
2720 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2721 return Op;
2722
2723 for (unsigned i = 0; i < NumElts; ++i) {
2724 SDValue Opi = Node->getOperand(i);
2725 if (isIntOrFPConstant(Opi)) {
2726 IsConstant = true;
2727 if (!ConstantValue.getNode())
2728 ConstantValue = Opi;
2729 else if (ConstantValue != Opi)
2730 UseSameConstant = false;
2731 }
2732 }
2733
2734 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2735 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2736 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2737 for (unsigned i = 0; i < NumElts; ++i) {
2738 SDValue Opi = Node->getOperand(i);
2739 if (!isIntOrFPConstant(Opi))
2740 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2741 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2742 }
2743 return Result;
2744 }
2745
2746 if (!IsConstant) {
2747 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2748 // The resulting code is the same length as the expansion, but it doesn't
2749 // use memory operations.
2750 assert(ResTy.isVector());
2751
2752 SDValue Op0 = Node->getOperand(0);
2753 SDValue Vector = DAG.getUNDEF(ResTy);
2754
2755 if (!Op0.isUndef())
2756 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2757 for (unsigned i = 1; i < NumElts; ++i) {
2758 SDValue Opi = Node->getOperand(i);
2759 if (Opi.isUndef())
2760 continue;
2761 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2762 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2763 }
2764 return Vector;
2765 }
2766
2767 return SDValue();
2768}
2769
2770SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2771 SelectionDAG &DAG) const {
2772 SDLoc DL(Op);
2773 MVT ResVT = Op.getSimpleValueType();
2774 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2775
2776 unsigned NumOperands = Op.getNumOperands();
2777 unsigned NumFreezeUndef = 0;
2778 unsigned NumZero = 0;
2779 unsigned NumNonZero = 0;
2780 unsigned NonZeros = 0;
2781 SmallSet<SDValue, 4> Undefs;
2782 for (unsigned i = 0; i != NumOperands; ++i) {
2783 SDValue SubVec = Op.getOperand(i);
2784 if (SubVec.isUndef())
2785 continue;
2786 if (ISD::isFreezeUndef(SubVec.getNode())) {
2787 // If the freeze(undef) has multiple uses then we must fold to zero.
2788 if (SubVec.hasOneUse()) {
2789 ++NumFreezeUndef;
2790 } else {
2791 ++NumZero;
2792 Undefs.insert(SubVec);
2793 }
2794 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2795 ++NumZero;
2796 else {
2797 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2798 NonZeros |= 1 << i;
2799 ++NumNonZero;
2800 }
2801 }
2802
2803 // If we have more than 2 non-zeros, build each half separately.
2804 if (NumNonZero > 2) {
2805 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2806 ArrayRef<SDUse> Ops = Op->ops();
2807 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2808 Ops.slice(0, NumOperands / 2));
2809 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2810 Ops.slice(NumOperands / 2));
2811 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2812 }
2813
2814 // Otherwise, build it up through insert_subvectors.
2815 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2816 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2817 : DAG.getUNDEF(ResVT));
2818
2819 // Replace Undef operands with ZeroVector.
2820 for (SDValue U : Undefs)
2821 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2822
2823 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2824 unsigned NumSubElems = SubVT.getVectorNumElements();
2825 for (unsigned i = 0; i != NumOperands; ++i) {
2826 if ((NonZeros & (1 << i)) == 0)
2827 continue;
2828
2829 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2830 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2831 }
2832
2833 return Vec;
2834}
2835
2836SDValue
2837LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2838 SelectionDAG &DAG) const {
2839 MVT EltVT = Op.getSimpleValueType();
2840 SDValue Vec = Op->getOperand(0);
2841 EVT VecTy = Vec->getValueType(0);
2842 SDValue Idx = Op->getOperand(1);
2843 SDLoc DL(Op);
2844 MVT GRLenVT = Subtarget.getGRLenVT();
2845
2846 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
2847
2848 if (isa<ConstantSDNode>(Idx))
2849 return Op;
2850
2851 switch (VecTy.getSimpleVT().SimpleTy) {
2852 default:
2853 llvm_unreachable("Unexpected type");
2854 case MVT::v32i8:
2855 case MVT::v16i16:
2856 case MVT::v4i64:
2857 case MVT::v4f64: {
2858 // Extract the high half subvector and place it to the low half of a new
2859 // vector. It doesn't matter what the high half of the new vector is.
2860 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2861 SDValue VecHi =
2862 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2863 SDValue TmpVec =
2864 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2865 VecHi, DAG.getConstant(0, DL, GRLenVT));
2866
2867 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
2868 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
2869 // desired element.
2870 SDValue IdxCp =
2871 Subtarget.is64Bit()
2872 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
2873 : DAG.getBitcast(MVT::f32, Idx);
2874 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
2875 SDValue MaskVec =
2876 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
2877 SDValue ResVec =
2878 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
2879
2880 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
2881 DAG.getConstant(0, DL, GRLenVT));
2882 }
2883 case MVT::v8i32:
2884 case MVT::v8f32: {
2885 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2886 SDValue SplatValue =
2887 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2888
2889 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2890 DAG.getConstant(0, DL, GRLenVT));
2891 }
2892 }
2893}
2894
2895SDValue
2896LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2897 SelectionDAG &DAG) const {
2898 MVT VT = Op.getSimpleValueType();
2899 MVT EltVT = VT.getVectorElementType();
2900 unsigned NumElts = VT.getVectorNumElements();
2901 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2902 SDLoc DL(Op);
2903 SDValue Op0 = Op.getOperand(0);
2904 SDValue Op1 = Op.getOperand(1);
2905 SDValue Op2 = Op.getOperand(2);
2906
2907 if (isa<ConstantSDNode>(Op2))
2908 return Op;
2909
2910 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2911 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2912
2913 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2914 return SDValue();
2915
2916 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2917 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2918
2919 SmallVector<SDValue, 32> RawIndices;
2920 for (unsigned i = 0; i < NumElts; ++i)
2921 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2922 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2923
2924 // insert vec, elt, idx
2925 // =>
2926 // select (splatidx == {0,1,2...}) ? splatelt : vec
2927 SDValue SelectCC =
2928 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2929 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2930}
2931
2932SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2933 SelectionDAG &DAG) const {
2934 SDLoc DL(Op);
2935 SyncScope::ID FenceSSID =
2936 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2937
2938 // singlethread fences only synchronize with signal handlers on the same
2939 // thread and thus only need to preserve instruction order, not actually
2940 // enforce memory ordering.
2941 if (FenceSSID == SyncScope::SingleThread)
2942 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2943 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2944
2945 return Op;
2946}
2947
2948SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2949 SelectionDAG &DAG) const {
2950
2951 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2952 DAG.getContext()->emitError(
2953 "On LA64, only 64-bit registers can be written.");
2954 return Op.getOperand(0);
2955 }
2956
2957 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2958 DAG.getContext()->emitError(
2959 "On LA32, only 32-bit registers can be written.");
2960 return Op.getOperand(0);
2961 }
2962
2963 return Op;
2964}
2965
2966SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2967 SelectionDAG &DAG) const {
2968 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2969 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2970 "be a constant integer");
2971 return SDValue();
2972 }
2973
2974 MachineFunction &MF = DAG.getMachineFunction();
2976 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2977 EVT VT = Op.getValueType();
2978 SDLoc DL(Op);
2979 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2980 unsigned Depth = Op.getConstantOperandVal(0);
2981 int GRLenInBytes = Subtarget.getGRLen() / 8;
2982
2983 while (Depth--) {
2984 int Offset = -(GRLenInBytes * 2);
2985 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2986 DAG.getSignedConstant(Offset, DL, VT));
2987 FrameAddr =
2988 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2989 }
2990 return FrameAddr;
2991}
2992
2993SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2994 SelectionDAG &DAG) const {
2995 // Currently only support lowering return address for current frame.
2996 if (Op.getConstantOperandVal(0) != 0) {
2997 DAG.getContext()->emitError(
2998 "return address can only be determined for the current frame");
2999 return SDValue();
3000 }
3001
3002 MachineFunction &MF = DAG.getMachineFunction();
3004 MVT GRLenVT = Subtarget.getGRLenVT();
3005
3006 // Return the value of the return address register, marking it an implicit
3007 // live-in.
3008 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3009 getRegClassFor(GRLenVT));
3010 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3011}
3012
3013SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3014 SelectionDAG &DAG) const {
3015 MachineFunction &MF = DAG.getMachineFunction();
3016 auto Size = Subtarget.getGRLen() / 8;
3017 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3018 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3019}
3020
3021SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3022 SelectionDAG &DAG) const {
3023 MachineFunction &MF = DAG.getMachineFunction();
3024 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3025
3026 SDLoc DL(Op);
3027 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3029
3030 // vastart just stores the address of the VarArgsFrameIndex slot into the
3031 // memory location argument.
3032 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3033 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3034 MachinePointerInfo(SV));
3035}
3036
3037SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3038 SelectionDAG &DAG) const {
3039 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3040 !Subtarget.hasBasicD() && "unexpected target features");
3041
3042 SDLoc DL(Op);
3043 SDValue Op0 = Op.getOperand(0);
3044 if (Op0->getOpcode() == ISD::AND) {
3045 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3046 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3047 return Op;
3048 }
3049
3050 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3051 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3052 Op0.getConstantOperandVal(2) == UINT64_C(0))
3053 return Op;
3054
3055 if (Op0.getOpcode() == ISD::AssertZext &&
3056 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3057 return Op;
3058
3059 EVT OpVT = Op0.getValueType();
3060 EVT RetVT = Op.getValueType();
3061 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3062 MakeLibCallOptions CallOptions;
3063 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3064 SDValue Chain = SDValue();
3066 std::tie(Result, Chain) =
3067 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3068 return Result;
3069}
3070
3071SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3072 SelectionDAG &DAG) const {
3073 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3074 !Subtarget.hasBasicD() && "unexpected target features");
3075
3076 SDLoc DL(Op);
3077 SDValue Op0 = Op.getOperand(0);
3078
3079 if ((Op0.getOpcode() == ISD::AssertSext ||
3081 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3082 return Op;
3083
3084 EVT OpVT = Op0.getValueType();
3085 EVT RetVT = Op.getValueType();
3086 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3087 MakeLibCallOptions CallOptions;
3088 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3089 SDValue Chain = SDValue();
3091 std::tie(Result, Chain) =
3092 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3093 return Result;
3094}
3095
3096SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3097 SelectionDAG &DAG) const {
3098
3099 SDLoc DL(Op);
3100 EVT VT = Op.getValueType();
3101 SDValue Op0 = Op.getOperand(0);
3102 EVT Op0VT = Op0.getValueType();
3103
3104 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3105 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3106 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3107 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3108 }
3109 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3110 SDValue Lo, Hi;
3111 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3112 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3113 }
3114 return Op;
3115}
3116
3117SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3118 SelectionDAG &DAG) const {
3119
3120 SDLoc DL(Op);
3121 SDValue Op0 = Op.getOperand(0);
3122
3123 if (Op0.getValueType() == MVT::f16)
3124 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3125
3126 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3127 !Subtarget.hasBasicD()) {
3128 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3129 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3130 }
3131
3132 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3133 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3134 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3135}
3136
3138 SelectionDAG &DAG, unsigned Flags) {
3139 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3140}
3141
3143 SelectionDAG &DAG, unsigned Flags) {
3144 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3145 Flags);
3146}
3147
3149 SelectionDAG &DAG, unsigned Flags) {
3150 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3151 N->getOffset(), Flags);
3152}
3153
3155 SelectionDAG &DAG, unsigned Flags) {
3156 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3157}
3158
3159template <class NodeTy>
3160SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3162 bool IsLocal) const {
3163 SDLoc DL(N);
3164 EVT Ty = getPointerTy(DAG.getDataLayout());
3165 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3166 SDValue Load;
3167
3168 switch (M) {
3169 default:
3170 report_fatal_error("Unsupported code model");
3171
3172 case CodeModel::Large: {
3173 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3174
3175 // This is not actually used, but is necessary for successfully matching
3176 // the PseudoLA_*_LARGE nodes.
3177 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3178 if (IsLocal) {
3179 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3180 // eventually becomes the desired 5-insn code sequence.
3181 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3182 Tmp, Addr),
3183 0);
3184 } else {
3185 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3186 // eventually becomes the desired 5-insn code sequence.
3187 Load = SDValue(
3188 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3189 0);
3190 }
3191 break;
3192 }
3193
3194 case CodeModel::Small:
3195 case CodeModel::Medium:
3196 if (IsLocal) {
3197 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3198 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3199 Load = SDValue(
3200 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3201 } else {
3202 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3203 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3204 Load =
3205 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3206 }
3207 }
3208
3209 if (!IsLocal) {
3210 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3211 MachineFunction &MF = DAG.getMachineFunction();
3212 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3216 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3217 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3218 }
3219
3220 return Load;
3221}
3222
3223SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3224 SelectionDAG &DAG) const {
3225 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3226 DAG.getTarget().getCodeModel());
3227}
3228
3229SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3230 SelectionDAG &DAG) const {
3231 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3232 DAG.getTarget().getCodeModel());
3233}
3234
3235SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3236 SelectionDAG &DAG) const {
3237 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3238 DAG.getTarget().getCodeModel());
3239}
3240
3241SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3242 SelectionDAG &DAG) const {
3243 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3244 assert(N->getOffset() == 0 && "unexpected offset in global node");
3245 auto CM = DAG.getTarget().getCodeModel();
3246 const GlobalValue *GV = N->getGlobal();
3247
3248 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3249 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3250 CM = *GCM;
3251 }
3252
3253 return getAddr(N, DAG, CM, GV->isDSOLocal());
3254}
3255
3256SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3257 SelectionDAG &DAG,
3258 unsigned Opc, bool UseGOT,
3259 bool Large) const {
3260 SDLoc DL(N);
3261 EVT Ty = getPointerTy(DAG.getDataLayout());
3262 MVT GRLenVT = Subtarget.getGRLenVT();
3263
3264 // This is not actually used, but is necessary for successfully matching the
3265 // PseudoLA_*_LARGE nodes.
3266 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3267 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3268
3269 // Only IE needs an extra argument for large code model.
3270 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3271 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3272 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3273
3274 // If it is LE for normal/medium code model, the add tp operation will occur
3275 // during the pseudo-instruction expansion.
3276 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3277 return Offset;
3278
3279 if (UseGOT) {
3280 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3281 MachineFunction &MF = DAG.getMachineFunction();
3282 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3286 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3287 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3288 }
3289
3290 // Add the thread pointer.
3291 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3292 DAG.getRegister(LoongArch::R2, GRLenVT));
3293}
3294
3295SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3296 SelectionDAG &DAG,
3297 unsigned Opc,
3298 bool Large) const {
3299 SDLoc DL(N);
3300 EVT Ty = getPointerTy(DAG.getDataLayout());
3301 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3302
3303 // This is not actually used, but is necessary for successfully matching the
3304 // PseudoLA_*_LARGE nodes.
3305 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3306
3307 // Use a PC-relative addressing mode to access the dynamic GOT address.
3308 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3309 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3310 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3311
3312 // Prepare argument list to generate call.
3314 Args.emplace_back(Load, CallTy);
3315
3316 // Setup call to __tls_get_addr.
3317 TargetLowering::CallLoweringInfo CLI(DAG);
3318 CLI.setDebugLoc(DL)
3319 .setChain(DAG.getEntryNode())
3320 .setLibCallee(CallingConv::C, CallTy,
3321 DAG.getExternalSymbol("__tls_get_addr", Ty),
3322 std::move(Args));
3323
3324 return LowerCallTo(CLI).first;
3325}
3326
3327SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3328 SelectionDAG &DAG, unsigned Opc,
3329 bool Large) const {
3330 SDLoc DL(N);
3331 EVT Ty = getPointerTy(DAG.getDataLayout());
3332 const GlobalValue *GV = N->getGlobal();
3333
3334 // This is not actually used, but is necessary for successfully matching the
3335 // PseudoLA_*_LARGE nodes.
3336 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3337
3338 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3339 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3340 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3341 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3342 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3343}
3344
3345SDValue
3346LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3347 SelectionDAG &DAG) const {
3350 report_fatal_error("In GHC calling convention TLS is not supported");
3351
3352 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3353 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3354
3355 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3356 assert(N->getOffset() == 0 && "unexpected offset in global node");
3357
3358 if (DAG.getTarget().useEmulatedTLS())
3359 reportFatalUsageError("the emulated TLS is prohibited");
3360
3361 bool IsDesc = DAG.getTarget().useTLSDESC();
3362
3363 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3365 // In this model, application code calls the dynamic linker function
3366 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3367 // runtime.
3368 if (!IsDesc)
3369 return getDynamicTLSAddr(N, DAG,
3370 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3371 : LoongArch::PseudoLA_TLS_GD,
3372 Large);
3373 break;
3375 // Same as GeneralDynamic, except for assembly modifiers and relocation
3376 // records.
3377 if (!IsDesc)
3378 return getDynamicTLSAddr(N, DAG,
3379 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3380 : LoongArch::PseudoLA_TLS_LD,
3381 Large);
3382 break;
3384 // This model uses the GOT to resolve TLS offsets.
3385 return getStaticTLSAddr(N, DAG,
3386 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3387 : LoongArch::PseudoLA_TLS_IE,
3388 /*UseGOT=*/true, Large);
3390 // This model is used when static linking as the TLS offsets are resolved
3391 // during program linking.
3392 //
3393 // This node doesn't need an extra argument for the large code model.
3394 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3395 /*UseGOT=*/false, Large);
3396 }
3397
3398 return getTLSDescAddr(N, DAG,
3399 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3400 : LoongArch::PseudoLA_TLS_DESC,
3401 Large);
3402}
3403
3404template <unsigned N>
3406 SelectionDAG &DAG, bool IsSigned = false) {
3407 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3408 // Check the ImmArg.
3409 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3410 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3411 DAG.getContext()->emitError(Op->getOperationName(0) +
3412 ": argument out of range.");
3413 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3414 }
3415 return SDValue();
3416}
3417
3418SDValue
3419LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3420 SelectionDAG &DAG) const {
3421 switch (Op.getConstantOperandVal(0)) {
3422 default:
3423 return SDValue(); // Don't custom lower most intrinsics.
3424 case Intrinsic::thread_pointer: {
3425 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3426 return DAG.getRegister(LoongArch::R2, PtrVT);
3427 }
3428 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3429 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3430 case Intrinsic::loongarch_lsx_vreplvei_d:
3431 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3432 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3433 case Intrinsic::loongarch_lsx_vreplvei_w:
3434 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3435 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3436 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3437 case Intrinsic::loongarch_lasx_xvpickve_d:
3438 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3439 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3440 case Intrinsic::loongarch_lasx_xvinsve0_d:
3441 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3442 case Intrinsic::loongarch_lsx_vsat_b:
3443 case Intrinsic::loongarch_lsx_vsat_bu:
3444 case Intrinsic::loongarch_lsx_vrotri_b:
3445 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3446 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3447 case Intrinsic::loongarch_lsx_vsrlri_b:
3448 case Intrinsic::loongarch_lsx_vsrari_b:
3449 case Intrinsic::loongarch_lsx_vreplvei_h:
3450 case Intrinsic::loongarch_lasx_xvsat_b:
3451 case Intrinsic::loongarch_lasx_xvsat_bu:
3452 case Intrinsic::loongarch_lasx_xvrotri_b:
3453 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3454 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3455 case Intrinsic::loongarch_lasx_xvsrlri_b:
3456 case Intrinsic::loongarch_lasx_xvsrari_b:
3457 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3458 case Intrinsic::loongarch_lasx_xvpickve_w:
3459 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3460 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3461 case Intrinsic::loongarch_lasx_xvinsve0_w:
3462 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3463 case Intrinsic::loongarch_lsx_vsat_h:
3464 case Intrinsic::loongarch_lsx_vsat_hu:
3465 case Intrinsic::loongarch_lsx_vrotri_h:
3466 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3467 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3468 case Intrinsic::loongarch_lsx_vsrlri_h:
3469 case Intrinsic::loongarch_lsx_vsrari_h:
3470 case Intrinsic::loongarch_lsx_vreplvei_b:
3471 case Intrinsic::loongarch_lasx_xvsat_h:
3472 case Intrinsic::loongarch_lasx_xvsat_hu:
3473 case Intrinsic::loongarch_lasx_xvrotri_h:
3474 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3475 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3476 case Intrinsic::loongarch_lasx_xvsrlri_h:
3477 case Intrinsic::loongarch_lasx_xvsrari_h:
3478 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3479 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3480 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3481 case Intrinsic::loongarch_lsx_vsrani_b_h:
3482 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3483 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3484 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3485 case Intrinsic::loongarch_lsx_vssrani_b_h:
3486 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3487 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3488 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3489 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3490 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3491 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3492 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3493 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3494 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3495 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3496 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3497 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3498 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3499 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3500 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3501 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3502 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3503 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3504 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3505 case Intrinsic::loongarch_lsx_vsat_w:
3506 case Intrinsic::loongarch_lsx_vsat_wu:
3507 case Intrinsic::loongarch_lsx_vrotri_w:
3508 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3509 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3510 case Intrinsic::loongarch_lsx_vsrlri_w:
3511 case Intrinsic::loongarch_lsx_vsrari_w:
3512 case Intrinsic::loongarch_lsx_vslei_bu:
3513 case Intrinsic::loongarch_lsx_vslei_hu:
3514 case Intrinsic::loongarch_lsx_vslei_wu:
3515 case Intrinsic::loongarch_lsx_vslei_du:
3516 case Intrinsic::loongarch_lsx_vslti_bu:
3517 case Intrinsic::loongarch_lsx_vslti_hu:
3518 case Intrinsic::loongarch_lsx_vslti_wu:
3519 case Intrinsic::loongarch_lsx_vslti_du:
3520 case Intrinsic::loongarch_lsx_vbsll_v:
3521 case Intrinsic::loongarch_lsx_vbsrl_v:
3522 case Intrinsic::loongarch_lasx_xvsat_w:
3523 case Intrinsic::loongarch_lasx_xvsat_wu:
3524 case Intrinsic::loongarch_lasx_xvrotri_w:
3525 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3526 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3527 case Intrinsic::loongarch_lasx_xvsrlri_w:
3528 case Intrinsic::loongarch_lasx_xvsrari_w:
3529 case Intrinsic::loongarch_lasx_xvslei_bu:
3530 case Intrinsic::loongarch_lasx_xvslei_hu:
3531 case Intrinsic::loongarch_lasx_xvslei_wu:
3532 case Intrinsic::loongarch_lasx_xvslei_du:
3533 case Intrinsic::loongarch_lasx_xvslti_bu:
3534 case Intrinsic::loongarch_lasx_xvslti_hu:
3535 case Intrinsic::loongarch_lasx_xvslti_wu:
3536 case Intrinsic::loongarch_lasx_xvslti_du:
3537 case Intrinsic::loongarch_lasx_xvbsll_v:
3538 case Intrinsic::loongarch_lasx_xvbsrl_v:
3539 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3540 case Intrinsic::loongarch_lsx_vseqi_b:
3541 case Intrinsic::loongarch_lsx_vseqi_h:
3542 case Intrinsic::loongarch_lsx_vseqi_w:
3543 case Intrinsic::loongarch_lsx_vseqi_d:
3544 case Intrinsic::loongarch_lsx_vslei_b:
3545 case Intrinsic::loongarch_lsx_vslei_h:
3546 case Intrinsic::loongarch_lsx_vslei_w:
3547 case Intrinsic::loongarch_lsx_vslei_d:
3548 case Intrinsic::loongarch_lsx_vslti_b:
3549 case Intrinsic::loongarch_lsx_vslti_h:
3550 case Intrinsic::loongarch_lsx_vslti_w:
3551 case Intrinsic::loongarch_lsx_vslti_d:
3552 case Intrinsic::loongarch_lasx_xvseqi_b:
3553 case Intrinsic::loongarch_lasx_xvseqi_h:
3554 case Intrinsic::loongarch_lasx_xvseqi_w:
3555 case Intrinsic::loongarch_lasx_xvseqi_d:
3556 case Intrinsic::loongarch_lasx_xvslei_b:
3557 case Intrinsic::loongarch_lasx_xvslei_h:
3558 case Intrinsic::loongarch_lasx_xvslei_w:
3559 case Intrinsic::loongarch_lasx_xvslei_d:
3560 case Intrinsic::loongarch_lasx_xvslti_b:
3561 case Intrinsic::loongarch_lasx_xvslti_h:
3562 case Intrinsic::loongarch_lasx_xvslti_w:
3563 case Intrinsic::loongarch_lasx_xvslti_d:
3564 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3565 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3566 case Intrinsic::loongarch_lsx_vsrani_h_w:
3567 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3568 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3569 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3570 case Intrinsic::loongarch_lsx_vssrani_h_w:
3571 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3572 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3573 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3574 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3575 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3576 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3577 case Intrinsic::loongarch_lsx_vfrstpi_b:
3578 case Intrinsic::loongarch_lsx_vfrstpi_h:
3579 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3580 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3581 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3582 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3583 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3584 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3585 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3586 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3587 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3588 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3589 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3590 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3591 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3592 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3593 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3594 case Intrinsic::loongarch_lsx_vsat_d:
3595 case Intrinsic::loongarch_lsx_vsat_du:
3596 case Intrinsic::loongarch_lsx_vrotri_d:
3597 case Intrinsic::loongarch_lsx_vsrlri_d:
3598 case Intrinsic::loongarch_lsx_vsrari_d:
3599 case Intrinsic::loongarch_lasx_xvsat_d:
3600 case Intrinsic::loongarch_lasx_xvsat_du:
3601 case Intrinsic::loongarch_lasx_xvrotri_d:
3602 case Intrinsic::loongarch_lasx_xvsrlri_d:
3603 case Intrinsic::loongarch_lasx_xvsrari_d:
3604 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3605 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3606 case Intrinsic::loongarch_lsx_vsrani_w_d:
3607 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3608 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3609 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3610 case Intrinsic::loongarch_lsx_vssrani_w_d:
3611 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3612 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3613 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3614 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3615 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3616 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3617 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3618 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3619 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3620 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3621 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3622 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3623 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3624 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3625 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3626 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3627 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3628 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3629 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3630 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3631 case Intrinsic::loongarch_lsx_vsrani_d_q:
3632 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3633 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3634 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3635 case Intrinsic::loongarch_lsx_vssrani_d_q:
3636 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3637 case Intrinsic::loongarch_lsx_vssrani_du_q:
3638 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3639 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3640 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3641 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3642 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3643 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3644 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3645 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3646 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3647 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3648 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3649 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3650 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3651 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3652 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3653 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3654 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3655 case Intrinsic::loongarch_lsx_vnori_b:
3656 case Intrinsic::loongarch_lsx_vshuf4i_b:
3657 case Intrinsic::loongarch_lsx_vshuf4i_h:
3658 case Intrinsic::loongarch_lsx_vshuf4i_w:
3659 case Intrinsic::loongarch_lasx_xvnori_b:
3660 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3661 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3662 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3663 case Intrinsic::loongarch_lasx_xvpermi_d:
3664 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3665 case Intrinsic::loongarch_lsx_vshuf4i_d:
3666 case Intrinsic::loongarch_lsx_vpermi_w:
3667 case Intrinsic::loongarch_lsx_vbitseli_b:
3668 case Intrinsic::loongarch_lsx_vextrins_b:
3669 case Intrinsic::loongarch_lsx_vextrins_h:
3670 case Intrinsic::loongarch_lsx_vextrins_w:
3671 case Intrinsic::loongarch_lsx_vextrins_d:
3672 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3673 case Intrinsic::loongarch_lasx_xvpermi_w:
3674 case Intrinsic::loongarch_lasx_xvpermi_q:
3675 case Intrinsic::loongarch_lasx_xvbitseli_b:
3676 case Intrinsic::loongarch_lasx_xvextrins_b:
3677 case Intrinsic::loongarch_lasx_xvextrins_h:
3678 case Intrinsic::loongarch_lasx_xvextrins_w:
3679 case Intrinsic::loongarch_lasx_xvextrins_d:
3680 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3681 case Intrinsic::loongarch_lsx_vrepli_b:
3682 case Intrinsic::loongarch_lsx_vrepli_h:
3683 case Intrinsic::loongarch_lsx_vrepli_w:
3684 case Intrinsic::loongarch_lsx_vrepli_d:
3685 case Intrinsic::loongarch_lasx_xvrepli_b:
3686 case Intrinsic::loongarch_lasx_xvrepli_h:
3687 case Intrinsic::loongarch_lasx_xvrepli_w:
3688 case Intrinsic::loongarch_lasx_xvrepli_d:
3689 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3690 case Intrinsic::loongarch_lsx_vldi:
3691 case Intrinsic::loongarch_lasx_xvldi:
3692 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3693 }
3694}
3695
3696// Helper function that emits error message for intrinsics with chain and return
3697// merge values of a UNDEF and the chain.
3699 StringRef ErrorMsg,
3700 SelectionDAG &DAG) {
3701 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3702 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3703 SDLoc(Op));
3704}
3705
3706SDValue
3707LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3708 SelectionDAG &DAG) const {
3709 SDLoc DL(Op);
3710 MVT GRLenVT = Subtarget.getGRLenVT();
3711 EVT VT = Op.getValueType();
3712 SDValue Chain = Op.getOperand(0);
3713 const StringRef ErrorMsgOOR = "argument out of range";
3714 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3715 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3716
3717 switch (Op.getConstantOperandVal(1)) {
3718 default:
3719 return Op;
3720 case Intrinsic::loongarch_crc_w_b_w:
3721 case Intrinsic::loongarch_crc_w_h_w:
3722 case Intrinsic::loongarch_crc_w_w_w:
3723 case Intrinsic::loongarch_crc_w_d_w:
3724 case Intrinsic::loongarch_crcc_w_b_w:
3725 case Intrinsic::loongarch_crcc_w_h_w:
3726 case Intrinsic::loongarch_crcc_w_w_w:
3727 case Intrinsic::loongarch_crcc_w_d_w:
3728 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3729 case Intrinsic::loongarch_csrrd_w:
3730 case Intrinsic::loongarch_csrrd_d: {
3731 unsigned Imm = Op.getConstantOperandVal(2);
3732 return !isUInt<14>(Imm)
3733 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3734 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3735 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3736 }
3737 case Intrinsic::loongarch_csrwr_w:
3738 case Intrinsic::loongarch_csrwr_d: {
3739 unsigned Imm = Op.getConstantOperandVal(3);
3740 return !isUInt<14>(Imm)
3741 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3742 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3743 {Chain, Op.getOperand(2),
3744 DAG.getConstant(Imm, DL, GRLenVT)});
3745 }
3746 case Intrinsic::loongarch_csrxchg_w:
3747 case Intrinsic::loongarch_csrxchg_d: {
3748 unsigned Imm = Op.getConstantOperandVal(4);
3749 return !isUInt<14>(Imm)
3750 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3751 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3752 {Chain, Op.getOperand(2), Op.getOperand(3),
3753 DAG.getConstant(Imm, DL, GRLenVT)});
3754 }
3755 case Intrinsic::loongarch_iocsrrd_d: {
3756 return DAG.getNode(
3757 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3758 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3759 }
3760#define IOCSRRD_CASE(NAME, NODE) \
3761 case Intrinsic::loongarch_##NAME: { \
3762 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3763 {Chain, Op.getOperand(2)}); \
3764 }
3765 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3766 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3767 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3768#undef IOCSRRD_CASE
3769 case Intrinsic::loongarch_cpucfg: {
3770 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3771 {Chain, Op.getOperand(2)});
3772 }
3773 case Intrinsic::loongarch_lddir_d: {
3774 unsigned Imm = Op.getConstantOperandVal(3);
3775 return !isUInt<8>(Imm)
3776 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3777 : Op;
3778 }
3779 case Intrinsic::loongarch_movfcsr2gr: {
3780 if (!Subtarget.hasBasicF())
3781 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3782 unsigned Imm = Op.getConstantOperandVal(2);
3783 return !isUInt<2>(Imm)
3784 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3785 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3786 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3787 }
3788 case Intrinsic::loongarch_lsx_vld:
3789 case Intrinsic::loongarch_lsx_vldrepl_b:
3790 case Intrinsic::loongarch_lasx_xvld:
3791 case Intrinsic::loongarch_lasx_xvldrepl_b:
3792 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3793 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3794 : SDValue();
3795 case Intrinsic::loongarch_lsx_vldrepl_h:
3796 case Intrinsic::loongarch_lasx_xvldrepl_h:
3797 return !isShiftedInt<11, 1>(
3798 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3800 Op, "argument out of range or not a multiple of 2", DAG)
3801 : SDValue();
3802 case Intrinsic::loongarch_lsx_vldrepl_w:
3803 case Intrinsic::loongarch_lasx_xvldrepl_w:
3804 return !isShiftedInt<10, 2>(
3805 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3807 Op, "argument out of range or not a multiple of 4", DAG)
3808 : SDValue();
3809 case Intrinsic::loongarch_lsx_vldrepl_d:
3810 case Intrinsic::loongarch_lasx_xvldrepl_d:
3811 return !isShiftedInt<9, 3>(
3812 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3814 Op, "argument out of range or not a multiple of 8", DAG)
3815 : SDValue();
3816 }
3817}
3818
3819// Helper function that emits error message for intrinsics with void return
3820// value and return the chain.
3822 SelectionDAG &DAG) {
3823
3824 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3825 return Op.getOperand(0);
3826}
3827
3828SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3829 SelectionDAG &DAG) const {
3830 SDLoc DL(Op);
3831 MVT GRLenVT = Subtarget.getGRLenVT();
3832 SDValue Chain = Op.getOperand(0);
3833 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3834 SDValue Op2 = Op.getOperand(2);
3835 const StringRef ErrorMsgOOR = "argument out of range";
3836 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3837 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3838 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3839
3840 switch (IntrinsicEnum) {
3841 default:
3842 // TODO: Add more Intrinsics.
3843 return SDValue();
3844 case Intrinsic::loongarch_cacop_d:
3845 case Intrinsic::loongarch_cacop_w: {
3846 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3847 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3848 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3849 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3850 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3851 unsigned Imm1 = Op2->getAsZExtVal();
3852 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3853 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3854 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3855 return Op;
3856 }
3857 case Intrinsic::loongarch_dbar: {
3858 unsigned Imm = Op2->getAsZExtVal();
3859 return !isUInt<15>(Imm)
3860 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3861 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3862 DAG.getConstant(Imm, DL, GRLenVT));
3863 }
3864 case Intrinsic::loongarch_ibar: {
3865 unsigned Imm = Op2->getAsZExtVal();
3866 return !isUInt<15>(Imm)
3867 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3868 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3869 DAG.getConstant(Imm, DL, GRLenVT));
3870 }
3871 case Intrinsic::loongarch_break: {
3872 unsigned Imm = Op2->getAsZExtVal();
3873 return !isUInt<15>(Imm)
3874 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3875 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3876 DAG.getConstant(Imm, DL, GRLenVT));
3877 }
3878 case Intrinsic::loongarch_movgr2fcsr: {
3879 if (!Subtarget.hasBasicF())
3880 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3881 unsigned Imm = Op2->getAsZExtVal();
3882 return !isUInt<2>(Imm)
3883 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3884 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3885 DAG.getConstant(Imm, DL, GRLenVT),
3886 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3887 Op.getOperand(3)));
3888 }
3889 case Intrinsic::loongarch_syscall: {
3890 unsigned Imm = Op2->getAsZExtVal();
3891 return !isUInt<15>(Imm)
3892 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3893 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3894 DAG.getConstant(Imm, DL, GRLenVT));
3895 }
3896#define IOCSRWR_CASE(NAME, NODE) \
3897 case Intrinsic::loongarch_##NAME: { \
3898 SDValue Op3 = Op.getOperand(3); \
3899 return Subtarget.is64Bit() \
3900 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3901 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3902 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3903 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3904 Op3); \
3905 }
3906 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3907 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3908 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3909#undef IOCSRWR_CASE
3910 case Intrinsic::loongarch_iocsrwr_d: {
3911 return !Subtarget.is64Bit()
3912 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3913 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3914 Op2,
3915 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3916 Op.getOperand(3)));
3917 }
3918#define ASRT_LE_GT_CASE(NAME) \
3919 case Intrinsic::loongarch_##NAME: { \
3920 return !Subtarget.is64Bit() \
3921 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3922 : Op; \
3923 }
3924 ASRT_LE_GT_CASE(asrtle_d)
3925 ASRT_LE_GT_CASE(asrtgt_d)
3926#undef ASRT_LE_GT_CASE
3927 case Intrinsic::loongarch_ldpte_d: {
3928 unsigned Imm = Op.getConstantOperandVal(3);
3929 return !Subtarget.is64Bit()
3930 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3931 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3932 : Op;
3933 }
3934 case Intrinsic::loongarch_lsx_vst:
3935 case Intrinsic::loongarch_lasx_xvst:
3936 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3937 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3938 : SDValue();
3939 case Intrinsic::loongarch_lasx_xvstelm_b:
3940 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3941 !isUInt<5>(Op.getConstantOperandVal(5)))
3942 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3943 : SDValue();
3944 case Intrinsic::loongarch_lsx_vstelm_b:
3945 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3946 !isUInt<4>(Op.getConstantOperandVal(5)))
3947 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3948 : SDValue();
3949 case Intrinsic::loongarch_lasx_xvstelm_h:
3950 return (!isShiftedInt<8, 1>(
3951 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3952 !isUInt<4>(Op.getConstantOperandVal(5)))
3954 Op, "argument out of range or not a multiple of 2", DAG)
3955 : SDValue();
3956 case Intrinsic::loongarch_lsx_vstelm_h:
3957 return (!isShiftedInt<8, 1>(
3958 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3959 !isUInt<3>(Op.getConstantOperandVal(5)))
3961 Op, "argument out of range or not a multiple of 2", DAG)
3962 : SDValue();
3963 case Intrinsic::loongarch_lasx_xvstelm_w:
3964 return (!isShiftedInt<8, 2>(
3965 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3966 !isUInt<3>(Op.getConstantOperandVal(5)))
3968 Op, "argument out of range or not a multiple of 4", DAG)
3969 : SDValue();
3970 case Intrinsic::loongarch_lsx_vstelm_w:
3971 return (!isShiftedInt<8, 2>(
3972 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3973 !isUInt<2>(Op.getConstantOperandVal(5)))
3975 Op, "argument out of range or not a multiple of 4", DAG)
3976 : SDValue();
3977 case Intrinsic::loongarch_lasx_xvstelm_d:
3978 return (!isShiftedInt<8, 3>(
3979 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3980 !isUInt<2>(Op.getConstantOperandVal(5)))
3982 Op, "argument out of range or not a multiple of 8", DAG)
3983 : SDValue();
3984 case Intrinsic::loongarch_lsx_vstelm_d:
3985 return (!isShiftedInt<8, 3>(
3986 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3987 !isUInt<1>(Op.getConstantOperandVal(5)))
3989 Op, "argument out of range or not a multiple of 8", DAG)
3990 : SDValue();
3991 }
3992}
3993
3994SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3995 SelectionDAG &DAG) const {
3996 SDLoc DL(Op);
3997 SDValue Lo = Op.getOperand(0);
3998 SDValue Hi = Op.getOperand(1);
3999 SDValue Shamt = Op.getOperand(2);
4000 EVT VT = Lo.getValueType();
4001
4002 // if Shamt-GRLen < 0: // Shamt < GRLen
4003 // Lo = Lo << Shamt
4004 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4005 // else:
4006 // Lo = 0
4007 // Hi = Lo << (Shamt-GRLen)
4008
4009 SDValue Zero = DAG.getConstant(0, DL, VT);
4010 SDValue One = DAG.getConstant(1, DL, VT);
4011 SDValue MinusGRLen =
4012 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4013 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4014 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4015 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4016
4017 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4018 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4019 SDValue ShiftRightLo =
4020 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4021 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4022 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4023 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4024
4025 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4026
4027 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4028 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4029
4030 SDValue Parts[2] = {Lo, Hi};
4031 return DAG.getMergeValues(Parts, DL);
4032}
4033
4034SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4035 SelectionDAG &DAG,
4036 bool IsSRA) const {
4037 SDLoc DL(Op);
4038 SDValue Lo = Op.getOperand(0);
4039 SDValue Hi = Op.getOperand(1);
4040 SDValue Shamt = Op.getOperand(2);
4041 EVT VT = Lo.getValueType();
4042
4043 // SRA expansion:
4044 // if Shamt-GRLen < 0: // Shamt < GRLen
4045 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4046 // Hi = Hi >>s Shamt
4047 // else:
4048 // Lo = Hi >>s (Shamt-GRLen);
4049 // Hi = Hi >>s (GRLen-1)
4050 //
4051 // SRL expansion:
4052 // if Shamt-GRLen < 0: // Shamt < GRLen
4053 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4054 // Hi = Hi >>u Shamt
4055 // else:
4056 // Lo = Hi >>u (Shamt-GRLen);
4057 // Hi = 0;
4058
4059 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4060
4061 SDValue Zero = DAG.getConstant(0, DL, VT);
4062 SDValue One = DAG.getConstant(1, DL, VT);
4063 SDValue MinusGRLen =
4064 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4065 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4066 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4067 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4068
4069 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4070 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4071 SDValue ShiftLeftHi =
4072 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4073 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4074 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4075 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4076 SDValue HiFalse =
4077 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4078
4079 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4080
4081 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4082 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4083
4084 SDValue Parts[2] = {Lo, Hi};
4085 return DAG.getMergeValues(Parts, DL);
4086}
4087
4088// Returns the opcode of the target-specific SDNode that implements the 32-bit
4089// form of the given Opcode.
4091 switch (Opcode) {
4092 default:
4093 llvm_unreachable("Unexpected opcode");
4094 case ISD::SDIV:
4095 return LoongArchISD::DIV_W;
4096 case ISD::UDIV:
4097 return LoongArchISD::DIV_WU;
4098 case ISD::SREM:
4099 return LoongArchISD::MOD_W;
4100 case ISD::UREM:
4101 return LoongArchISD::MOD_WU;
4102 case ISD::SHL:
4103 return LoongArchISD::SLL_W;
4104 case ISD::SRA:
4105 return LoongArchISD::SRA_W;
4106 case ISD::SRL:
4107 return LoongArchISD::SRL_W;
4108 case ISD::ROTL:
4109 case ISD::ROTR:
4110 return LoongArchISD::ROTR_W;
4111 case ISD::CTTZ:
4112 return LoongArchISD::CTZ_W;
4113 case ISD::CTLZ:
4114 return LoongArchISD::CLZ_W;
4115 }
4116}
4117
4118// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4119// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4120// otherwise be promoted to i64, making it difficult to select the
4121// SLL_W/.../*W later one because the fact the operation was originally of
4122// type i8/i16/i32 is lost.
4124 unsigned ExtOpc = ISD::ANY_EXTEND) {
4125 SDLoc DL(N);
4126 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4127 SDValue NewOp0, NewRes;
4128
4129 switch (NumOp) {
4130 default:
4131 llvm_unreachable("Unexpected NumOp");
4132 case 1: {
4133 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4134 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4135 break;
4136 }
4137 case 2: {
4138 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4139 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4140 if (N->getOpcode() == ISD::ROTL) {
4141 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4142 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4143 }
4144 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4145 break;
4146 }
4147 // TODO:Handle more NumOp.
4148 }
4149
4150 // ReplaceNodeResults requires we maintain the same type for the return
4151 // value.
4152 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4153}
4154
4155// Converts the given 32-bit operation to a i64 operation with signed extension
4156// semantic to reduce the signed extension instructions.
4158 SDLoc DL(N);
4159 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4160 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4161 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4162 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4163 DAG.getValueType(MVT::i32));
4164 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4165}
4166
4167// Helper function that emits error message for intrinsics with/without chain
4168// and return a UNDEF or and the chain as the results.
4171 StringRef ErrorMsg, bool WithChain = true) {
4172 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4173 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4174 if (!WithChain)
4175 return;
4176 Results.push_back(N->getOperand(0));
4177}
4178
4179template <unsigned N>
4180static void
4182 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4183 unsigned ResOp) {
4184 const StringRef ErrorMsgOOR = "argument out of range";
4185 unsigned Imm = Node->getConstantOperandVal(2);
4186 if (!isUInt<N>(Imm)) {
4188 /*WithChain=*/false);
4189 return;
4190 }
4191 SDLoc DL(Node);
4192 SDValue Vec = Node->getOperand(1);
4193
4194 SDValue PickElt =
4195 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4196 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4198 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4199 PickElt.getValue(0)));
4200}
4201
4204 SelectionDAG &DAG,
4205 const LoongArchSubtarget &Subtarget,
4206 unsigned ResOp) {
4207 SDLoc DL(N);
4208 SDValue Vec = N->getOperand(1);
4209
4210 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4211 Results.push_back(
4212 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4213}
4214
4215static void
4217 SelectionDAG &DAG,
4218 const LoongArchSubtarget &Subtarget) {
4219 switch (N->getConstantOperandVal(0)) {
4220 default:
4221 llvm_unreachable("Unexpected Intrinsic.");
4222 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4223 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4225 break;
4226 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4227 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4228 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4230 break;
4231 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4232 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4234 break;
4235 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4236 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4238 break;
4239 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4240 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4241 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4243 break;
4244 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4245 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4247 break;
4248 case Intrinsic::loongarch_lsx_bz_b:
4249 case Intrinsic::loongarch_lsx_bz_h:
4250 case Intrinsic::loongarch_lsx_bz_w:
4251 case Intrinsic::loongarch_lsx_bz_d:
4252 case Intrinsic::loongarch_lasx_xbz_b:
4253 case Intrinsic::loongarch_lasx_xbz_h:
4254 case Intrinsic::loongarch_lasx_xbz_w:
4255 case Intrinsic::loongarch_lasx_xbz_d:
4256 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4258 break;
4259 case Intrinsic::loongarch_lsx_bz_v:
4260 case Intrinsic::loongarch_lasx_xbz_v:
4261 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4263 break;
4264 case Intrinsic::loongarch_lsx_bnz_b:
4265 case Intrinsic::loongarch_lsx_bnz_h:
4266 case Intrinsic::loongarch_lsx_bnz_w:
4267 case Intrinsic::loongarch_lsx_bnz_d:
4268 case Intrinsic::loongarch_lasx_xbnz_b:
4269 case Intrinsic::loongarch_lasx_xbnz_h:
4270 case Intrinsic::loongarch_lasx_xbnz_w:
4271 case Intrinsic::loongarch_lasx_xbnz_d:
4272 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4274 break;
4275 case Intrinsic::loongarch_lsx_bnz_v:
4276 case Intrinsic::loongarch_lasx_xbnz_v:
4277 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4279 break;
4280 }
4281}
4282
4285 SelectionDAG &DAG) {
4286 assert(N->getValueType(0) == MVT::i128 &&
4287 "AtomicCmpSwap on types less than 128 should be legal");
4288 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4289
4290 unsigned Opcode;
4291 switch (MemOp->getMergedOrdering()) {
4295 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4296 break;
4299 Opcode = LoongArch::PseudoCmpXchg128;
4300 break;
4301 default:
4302 llvm_unreachable("Unexpected ordering!");
4303 }
4304
4305 SDLoc DL(N);
4306 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4307 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4308 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4309 NewVal.first, NewVal.second, N->getOperand(0)};
4310
4311 SDNode *CmpSwap = DAG.getMachineNode(
4312 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4313 Ops);
4314 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4315 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4316 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4317 Results.push_back(SDValue(CmpSwap, 3));
4318}
4319
4322 SDLoc DL(N);
4323 EVT VT = N->getValueType(0);
4324 switch (N->getOpcode()) {
4325 default:
4326 llvm_unreachable("Don't know how to legalize this operation");
4327 case ISD::ADD:
4328 case ISD::SUB:
4329 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4330 "Unexpected custom legalisation");
4331 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4332 break;
4333 case ISD::SDIV:
4334 case ISD::UDIV:
4335 case ISD::SREM:
4336 case ISD::UREM:
4337 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4338 "Unexpected custom legalisation");
4339 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4340 Subtarget.hasDiv32() && VT == MVT::i32
4342 : ISD::SIGN_EXTEND));
4343 break;
4344 case ISD::SHL:
4345 case ISD::SRA:
4346 case ISD::SRL:
4347 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4348 "Unexpected custom legalisation");
4349 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4350 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4351 break;
4352 }
4353 break;
4354 case ISD::ROTL:
4355 case ISD::ROTR:
4356 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4357 "Unexpected custom legalisation");
4358 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4359 break;
4360 case ISD::FP_TO_SINT: {
4361 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4362 "Unexpected custom legalisation");
4363 SDValue Src = N->getOperand(0);
4364 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4365 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4367 if (!isTypeLegal(Src.getValueType()))
4368 return;
4369 if (Src.getValueType() == MVT::f16)
4370 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4371 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4372 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4373 return;
4374 }
4375 // If the FP type needs to be softened, emit a library call using the 'si'
4376 // version. If we left it to default legalization we'd end up with 'di'.
4377 RTLIB::Libcall LC;
4378 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4379 MakeLibCallOptions CallOptions;
4380 EVT OpVT = Src.getValueType();
4381 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4382 SDValue Chain = SDValue();
4383 SDValue Result;
4384 std::tie(Result, Chain) =
4385 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4386 Results.push_back(Result);
4387 break;
4388 }
4389 case ISD::BITCAST: {
4390 SDValue Src = N->getOperand(0);
4391 EVT SrcVT = Src.getValueType();
4392 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4393 Subtarget.hasBasicF()) {
4394 SDValue Dst =
4395 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4396 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4397 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4399 DAG.getVTList(MVT::i32, MVT::i32), Src);
4400 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4401 NewReg.getValue(0), NewReg.getValue(1));
4402 Results.push_back(RetReg);
4403 }
4404 break;
4405 }
4406 case ISD::FP_TO_UINT: {
4407 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4408 "Unexpected custom legalisation");
4409 auto &TLI = DAG.getTargetLoweringInfo();
4410 SDValue Tmp1, Tmp2;
4411 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4412 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4413 break;
4414 }
4415 case ISD::BSWAP: {
4416 SDValue Src = N->getOperand(0);
4417 assert((VT == MVT::i16 || VT == MVT::i32) &&
4418 "Unexpected custom legalization");
4419 MVT GRLenVT = Subtarget.getGRLenVT();
4420 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4421 SDValue Tmp;
4422 switch (VT.getSizeInBits()) {
4423 default:
4424 llvm_unreachable("Unexpected operand width");
4425 case 16:
4426 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4427 break;
4428 case 32:
4429 // Only LA64 will get to here due to the size mismatch between VT and
4430 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4431 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4432 break;
4433 }
4434 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4435 break;
4436 }
4437 case ISD::BITREVERSE: {
4438 SDValue Src = N->getOperand(0);
4439 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4440 "Unexpected custom legalization");
4441 MVT GRLenVT = Subtarget.getGRLenVT();
4442 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4443 SDValue Tmp;
4444 switch (VT.getSizeInBits()) {
4445 default:
4446 llvm_unreachable("Unexpected operand width");
4447 case 8:
4448 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4449 break;
4450 case 32:
4451 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4452 break;
4453 }
4454 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4455 break;
4456 }
4457 case ISD::CTLZ:
4458 case ISD::CTTZ: {
4459 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4460 "Unexpected custom legalisation");
4461 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4462 break;
4463 }
4465 SDValue Chain = N->getOperand(0);
4466 SDValue Op2 = N->getOperand(2);
4467 MVT GRLenVT = Subtarget.getGRLenVT();
4468 const StringRef ErrorMsgOOR = "argument out of range";
4469 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4470 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4471
4472 switch (N->getConstantOperandVal(1)) {
4473 default:
4474 llvm_unreachable("Unexpected Intrinsic.");
4475 case Intrinsic::loongarch_movfcsr2gr: {
4476 if (!Subtarget.hasBasicF()) {
4477 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4478 return;
4479 }
4480 unsigned Imm = Op2->getAsZExtVal();
4481 if (!isUInt<2>(Imm)) {
4482 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4483 return;
4484 }
4485 SDValue MOVFCSR2GRResults = DAG.getNode(
4486 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4487 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4488 Results.push_back(
4489 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4490 Results.push_back(MOVFCSR2GRResults.getValue(1));
4491 break;
4492 }
4493#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4494 case Intrinsic::loongarch_##NAME: { \
4495 SDValue NODE = DAG.getNode( \
4496 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4497 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4498 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4499 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4500 Results.push_back(NODE.getValue(1)); \
4501 break; \
4502 }
4503 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4504 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4505 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4506 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4507 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4508 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4509#undef CRC_CASE_EXT_BINARYOP
4510
4511#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4512 case Intrinsic::loongarch_##NAME: { \
4513 SDValue NODE = DAG.getNode( \
4514 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4515 {Chain, Op2, \
4516 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4517 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4518 Results.push_back(NODE.getValue(1)); \
4519 break; \
4520 }
4521 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4522 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4523#undef CRC_CASE_EXT_UNARYOP
4524#define CSR_CASE(ID) \
4525 case Intrinsic::loongarch_##ID: { \
4526 if (!Subtarget.is64Bit()) \
4527 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4528 break; \
4529 }
4530 CSR_CASE(csrrd_d);
4531 CSR_CASE(csrwr_d);
4532 CSR_CASE(csrxchg_d);
4533 CSR_CASE(iocsrrd_d);
4534#undef CSR_CASE
4535 case Intrinsic::loongarch_csrrd_w: {
4536 unsigned Imm = Op2->getAsZExtVal();
4537 if (!isUInt<14>(Imm)) {
4538 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4539 return;
4540 }
4541 SDValue CSRRDResults =
4542 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4543 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4544 Results.push_back(
4545 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4546 Results.push_back(CSRRDResults.getValue(1));
4547 break;
4548 }
4549 case Intrinsic::loongarch_csrwr_w: {
4550 unsigned Imm = N->getConstantOperandVal(3);
4551 if (!isUInt<14>(Imm)) {
4552 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4553 return;
4554 }
4555 SDValue CSRWRResults =
4556 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4557 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4558 DAG.getConstant(Imm, DL, GRLenVT)});
4559 Results.push_back(
4560 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4561 Results.push_back(CSRWRResults.getValue(1));
4562 break;
4563 }
4564 case Intrinsic::loongarch_csrxchg_w: {
4565 unsigned Imm = N->getConstantOperandVal(4);
4566 if (!isUInt<14>(Imm)) {
4567 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4568 return;
4569 }
4570 SDValue CSRXCHGResults = DAG.getNode(
4571 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4572 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4573 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4574 DAG.getConstant(Imm, DL, GRLenVT)});
4575 Results.push_back(
4576 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4577 Results.push_back(CSRXCHGResults.getValue(1));
4578 break;
4579 }
4580#define IOCSRRD_CASE(NAME, NODE) \
4581 case Intrinsic::loongarch_##NAME: { \
4582 SDValue IOCSRRDResults = \
4583 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4584 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4585 Results.push_back( \
4586 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4587 Results.push_back(IOCSRRDResults.getValue(1)); \
4588 break; \
4589 }
4590 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4591 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4592 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4593#undef IOCSRRD_CASE
4594 case Intrinsic::loongarch_cpucfg: {
4595 SDValue CPUCFGResults =
4596 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4597 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4598 Results.push_back(
4599 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4600 Results.push_back(CPUCFGResults.getValue(1));
4601 break;
4602 }
4603 case Intrinsic::loongarch_lddir_d: {
4604 if (!Subtarget.is64Bit()) {
4605 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4606 return;
4607 }
4608 break;
4609 }
4610 }
4611 break;
4612 }
4613 case ISD::READ_REGISTER: {
4614 if (Subtarget.is64Bit())
4615 DAG.getContext()->emitError(
4616 "On LA64, only 64-bit registers can be read.");
4617 else
4618 DAG.getContext()->emitError(
4619 "On LA32, only 32-bit registers can be read.");
4620 Results.push_back(DAG.getUNDEF(VT));
4621 Results.push_back(N->getOperand(0));
4622 break;
4623 }
4625 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4626 break;
4627 }
4628 case ISD::LROUND: {
4629 SDValue Op0 = N->getOperand(0);
4630 EVT OpVT = Op0.getValueType();
4631 RTLIB::Libcall LC =
4632 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4633 MakeLibCallOptions CallOptions;
4634 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4635 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4636 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4637 Results.push_back(Result);
4638 break;
4639 }
4640 case ISD::ATOMIC_CMP_SWAP: {
4642 break;
4643 }
4644 case ISD::TRUNCATE: {
4645 MVT VT = N->getSimpleValueType(0);
4646 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4647 return;
4648
4649 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4650 SDValue In = N->getOperand(0);
4651 EVT InVT = In.getValueType();
4652 EVT InEltVT = InVT.getVectorElementType();
4653 EVT EltVT = VT.getVectorElementType();
4654 unsigned MinElts = VT.getVectorNumElements();
4655 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4656 unsigned InBits = InVT.getSizeInBits();
4657
4658 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4659 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4660 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4661 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4662 for (unsigned I = 0; I < MinElts; ++I)
4663 TruncMask[I] = Scale * I;
4664
4665 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4666 MVT SVT = In.getSimpleValueType().getScalarType();
4667 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4668 SDValue WidenIn =
4669 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4670 DAG.getVectorIdxConstant(0, DL));
4671 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4672 "Illegal vector type in truncation");
4673 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4674 Results.push_back(
4675 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4676 return;
4677 }
4678 }
4679
4680 break;
4681 }
4682 }
4683}
4684
4687 const LoongArchSubtarget &Subtarget) {
4688 if (DCI.isBeforeLegalizeOps())
4689 return SDValue();
4690
4691 SDValue FirstOperand = N->getOperand(0);
4692 SDValue SecondOperand = N->getOperand(1);
4693 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4694 EVT ValTy = N->getValueType(0);
4695 SDLoc DL(N);
4696 uint64_t lsb, msb;
4697 unsigned SMIdx, SMLen;
4698 ConstantSDNode *CN;
4699 SDValue NewOperand;
4700 MVT GRLenVT = Subtarget.getGRLenVT();
4701
4702 // BSTRPICK requires the 32S feature.
4703 if (!Subtarget.has32S())
4704 return SDValue();
4705
4706 // Op's second operand must be a shifted mask.
4707 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4708 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4709 return SDValue();
4710
4711 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4712 // Pattern match BSTRPICK.
4713 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4714 // => BSTRPICK $dst, $src, msb, lsb
4715 // where msb = lsb + len - 1
4716
4717 // The second operand of the shift must be an immediate.
4718 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4719 return SDValue();
4720
4721 lsb = CN->getZExtValue();
4722
4723 // Return if the shifted mask does not start at bit 0 or the sum of its
4724 // length and lsb exceeds the word's size.
4725 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4726 return SDValue();
4727
4728 NewOperand = FirstOperand.getOperand(0);
4729 } else {
4730 // Pattern match BSTRPICK.
4731 // $dst = and $src, (2**len- 1) , if len > 12
4732 // => BSTRPICK $dst, $src, msb, lsb
4733 // where lsb = 0 and msb = len - 1
4734
4735 // If the mask is <= 0xfff, andi can be used instead.
4736 if (CN->getZExtValue() <= 0xfff)
4737 return SDValue();
4738
4739 // Return if the MSB exceeds.
4740 if (SMIdx + SMLen > ValTy.getSizeInBits())
4741 return SDValue();
4742
4743 if (SMIdx > 0) {
4744 // Omit if the constant has more than 2 uses. This a conservative
4745 // decision. Whether it is a win depends on the HW microarchitecture.
4746 // However it should always be better for 1 and 2 uses.
4747 if (CN->use_size() > 2)
4748 return SDValue();
4749 // Return if the constant can be composed by a single LU12I.W.
4750 if ((CN->getZExtValue() & 0xfff) == 0)
4751 return SDValue();
4752 // Return if the constand can be composed by a single ADDI with
4753 // the zero register.
4754 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4755 return SDValue();
4756 }
4757
4758 lsb = SMIdx;
4759 NewOperand = FirstOperand;
4760 }
4761
4762 msb = lsb + SMLen - 1;
4763 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4764 DAG.getConstant(msb, DL, GRLenVT),
4765 DAG.getConstant(lsb, DL, GRLenVT));
4766 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4767 return NR0;
4768 // Try to optimize to
4769 // bstrpick $Rd, $Rs, msb, lsb
4770 // slli $Rd, $Rd, lsb
4771 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4772 DAG.getConstant(lsb, DL, GRLenVT));
4773}
4774
4777 const LoongArchSubtarget &Subtarget) {
4778 // BSTRPICK requires the 32S feature.
4779 if (!Subtarget.has32S())
4780 return SDValue();
4781
4782 if (DCI.isBeforeLegalizeOps())
4783 return SDValue();
4784
4785 // $dst = srl (and $src, Mask), Shamt
4786 // =>
4787 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4788 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4789 //
4790
4791 SDValue FirstOperand = N->getOperand(0);
4792 ConstantSDNode *CN;
4793 EVT ValTy = N->getValueType(0);
4794 SDLoc DL(N);
4795 MVT GRLenVT = Subtarget.getGRLenVT();
4796 unsigned MaskIdx, MaskLen;
4797 uint64_t Shamt;
4798
4799 // The first operand must be an AND and the second operand of the AND must be
4800 // a shifted mask.
4801 if (FirstOperand.getOpcode() != ISD::AND ||
4802 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4803 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4804 return SDValue();
4805
4806 // The second operand (shift amount) must be an immediate.
4807 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4808 return SDValue();
4809
4810 Shamt = CN->getZExtValue();
4811 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4812 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4813 FirstOperand->getOperand(0),
4814 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4815 DAG.getConstant(Shamt, DL, GRLenVT));
4816
4817 return SDValue();
4818}
4819
4820// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4821// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4822static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4823 unsigned Depth) {
4824 // Limit recursion.
4826 return false;
4827 switch (Src.getOpcode()) {
4828 case ISD::SETCC:
4829 case ISD::TRUNCATE:
4830 return Src.getOperand(0).getValueSizeInBits() == Size;
4831 case ISD::FREEZE:
4832 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4833 case ISD::AND:
4834 case ISD::XOR:
4835 case ISD::OR:
4836 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4837 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4838 case ISD::SELECT:
4839 case ISD::VSELECT:
4840 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4841 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4842 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4843 case ISD::BUILD_VECTOR:
4844 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4845 ISD::isBuildVectorAllOnes(Src.getNode());
4846 }
4847 return false;
4848}
4849
4850// Helper to push sign extension of vXi1 SETCC result through bitops.
4852 SDValue Src, const SDLoc &DL) {
4853 switch (Src.getOpcode()) {
4854 case ISD::SETCC:
4855 case ISD::FREEZE:
4856 case ISD::TRUNCATE:
4857 case ISD::BUILD_VECTOR:
4858 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4859 case ISD::AND:
4860 case ISD::XOR:
4861 case ISD::OR:
4862 return DAG.getNode(
4863 Src.getOpcode(), DL, SExtVT,
4864 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4865 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4866 case ISD::SELECT:
4867 case ISD::VSELECT:
4868 return DAG.getSelect(
4869 DL, SExtVT, Src.getOperand(0),
4870 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4871 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4872 }
4873 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4874}
4875
4876static SDValue
4879 const LoongArchSubtarget &Subtarget) {
4880 SDLoc DL(N);
4881 EVT VT = N->getValueType(0);
4882 SDValue Src = N->getOperand(0);
4883 EVT SrcVT = Src.getValueType();
4884
4885 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4886 return SDValue();
4887
4888 bool UseLASX;
4889 unsigned Opc = ISD::DELETED_NODE;
4890 EVT CmpVT = Src.getOperand(0).getValueType();
4891 EVT EltVT = CmpVT.getVectorElementType();
4892
4893 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4894 UseLASX = false;
4895 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4896 CmpVT.getSizeInBits() == 256)
4897 UseLASX = true;
4898 else
4899 return SDValue();
4900
4901 SDValue SrcN1 = Src.getOperand(1);
4902 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4903 default:
4904 break;
4905 case ISD::SETEQ:
4906 // x == 0 => not (vmsknez.b x)
4907 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4909 break;
4910 case ISD::SETGT:
4911 // x > -1 => vmskgez.b x
4912 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4914 break;
4915 case ISD::SETGE:
4916 // x >= 0 => vmskgez.b x
4917 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4919 break;
4920 case ISD::SETLT:
4921 // x < 0 => vmskltz.{b,h,w,d} x
4922 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4923 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4924 EltVT == MVT::i64))
4926 break;
4927 case ISD::SETLE:
4928 // x <= -1 => vmskltz.{b,h,w,d} x
4929 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4930 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4931 EltVT == MVT::i64))
4933 break;
4934 case ISD::SETNE:
4935 // x != 0 => vmsknez.b x
4936 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4938 break;
4939 }
4940
4941 if (Opc == ISD::DELETED_NODE)
4942 return SDValue();
4943
4944 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4946 V = DAG.getZExtOrTrunc(V, DL, T);
4947 return DAG.getBitcast(VT, V);
4948}
4949
4952 const LoongArchSubtarget &Subtarget) {
4953 SDLoc DL(N);
4954 EVT VT = N->getValueType(0);
4955 SDValue Src = N->getOperand(0);
4956 EVT SrcVT = Src.getValueType();
4957
4958 if (!DCI.isBeforeLegalizeOps())
4959 return SDValue();
4960
4961 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4962 return SDValue();
4963
4964 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4965 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4966 if (Res)
4967 return Res;
4968
4969 // Generate vXi1 using [X]VMSKLTZ
4970 MVT SExtVT;
4971 unsigned Opc;
4972 bool UseLASX = false;
4973 bool PropagateSExt = false;
4974
4975 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4976 EVT CmpVT = Src.getOperand(0).getValueType();
4977 if (CmpVT.getSizeInBits() > 256)
4978 return SDValue();
4979 }
4980
4981 switch (SrcVT.getSimpleVT().SimpleTy) {
4982 default:
4983 return SDValue();
4984 case MVT::v2i1:
4985 SExtVT = MVT::v2i64;
4986 break;
4987 case MVT::v4i1:
4988 SExtVT = MVT::v4i32;
4989 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4990 SExtVT = MVT::v4i64;
4991 UseLASX = true;
4992 PropagateSExt = true;
4993 }
4994 break;
4995 case MVT::v8i1:
4996 SExtVT = MVT::v8i16;
4997 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4998 SExtVT = MVT::v8i32;
4999 UseLASX = true;
5000 PropagateSExt = true;
5001 }
5002 break;
5003 case MVT::v16i1:
5004 SExtVT = MVT::v16i8;
5005 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5006 SExtVT = MVT::v16i16;
5007 UseLASX = true;
5008 PropagateSExt = true;
5009 }
5010 break;
5011 case MVT::v32i1:
5012 SExtVT = MVT::v32i8;
5013 UseLASX = true;
5014 break;
5015 };
5016 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5017 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5018
5019 SDValue V;
5020 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5021 if (Src.getSimpleValueType() == MVT::v32i8) {
5022 SDValue Lo, Hi;
5023 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5024 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5025 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5026 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5027 DAG.getConstant(16, DL, MVT::i8));
5028 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5029 } else if (UseLASX) {
5030 return SDValue();
5031 }
5032 }
5033
5034 if (!V) {
5036 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5037 }
5038
5040 V = DAG.getZExtOrTrunc(V, DL, T);
5041 return DAG.getBitcast(VT, V);
5042}
5043
5046 const LoongArchSubtarget &Subtarget) {
5047 MVT GRLenVT = Subtarget.getGRLenVT();
5048 EVT ValTy = N->getValueType(0);
5049 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5050 ConstantSDNode *CN0, *CN1;
5051 SDLoc DL(N);
5052 unsigned ValBits = ValTy.getSizeInBits();
5053 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5054 unsigned Shamt;
5055 bool SwapAndRetried = false;
5056
5057 // BSTRPICK requires the 32S feature.
5058 if (!Subtarget.has32S())
5059 return SDValue();
5060
5061 if (DCI.isBeforeLegalizeOps())
5062 return SDValue();
5063
5064 if (ValBits != 32 && ValBits != 64)
5065 return SDValue();
5066
5067Retry:
5068 // 1st pattern to match BSTRINS:
5069 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5070 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5071 // =>
5072 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5073 if (N0.getOpcode() == ISD::AND &&
5074 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5075 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5076 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5077 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5078 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5079 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5080 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5081 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5082 (MaskIdx0 + MaskLen0 <= ValBits)) {
5083 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5084 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5085 N1.getOperand(0).getOperand(0),
5086 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5087 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5088 }
5089
5090 // 2nd pattern to match BSTRINS:
5091 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5092 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5093 // =>
5094 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5095 if (N0.getOpcode() == ISD::AND &&
5096 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5097 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5098 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5099 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5100 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5101 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5102 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5103 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5104 (MaskIdx0 + MaskLen0 <= ValBits)) {
5105 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5106 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5107 N1.getOperand(0).getOperand(0),
5108 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5109 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5110 }
5111
5112 // 3rd pattern to match BSTRINS:
5113 // R = or (and X, mask0), (and Y, mask1)
5114 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5115 // =>
5116 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5117 // where msb = lsb + size - 1
5118 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5119 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5120 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5121 (MaskIdx0 + MaskLen0 <= 64) &&
5122 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5123 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5124 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5125 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5126 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5127 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5128 DAG.getConstant(ValBits == 32
5129 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5130 : (MaskIdx0 + MaskLen0 - 1),
5131 DL, GRLenVT),
5132 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5133 }
5134
5135 // 4th pattern to match BSTRINS:
5136 // R = or (and X, mask), (shl Y, shamt)
5137 // where mask = (2**shamt - 1)
5138 // =>
5139 // R = BSTRINS X, Y, ValBits - 1, shamt
5140 // where ValBits = 32 or 64
5141 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5142 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5143 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5144 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5145 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5146 (MaskIdx0 + MaskLen0 <= ValBits)) {
5147 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5148 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5149 N1.getOperand(0),
5150 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5151 DAG.getConstant(Shamt, DL, GRLenVT));
5152 }
5153
5154 // 5th pattern to match BSTRINS:
5155 // R = or (and X, mask), const
5156 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5157 // =>
5158 // R = BSTRINS X, (const >> lsb), msb, lsb
5159 // where msb = lsb + size - 1
5160 if (N0.getOpcode() == ISD::AND &&
5161 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5162 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5163 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5164 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5165 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5166 return DAG.getNode(
5167 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5168 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5169 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5170 : (MaskIdx0 + MaskLen0 - 1),
5171 DL, GRLenVT),
5172 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5173 }
5174
5175 // 6th pattern.
5176 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5177 // by the incoming bits are known to be zero.
5178 // =>
5179 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5180 //
5181 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5182 // pattern is more common than the 1st. So we put the 1st before the 6th in
5183 // order to match as many nodes as possible.
5184 ConstantSDNode *CNMask, *CNShamt;
5185 unsigned MaskIdx, MaskLen;
5186 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5187 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5188 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5189 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5190 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5191 Shamt = CNShamt->getZExtValue();
5192 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5193 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5194 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5195 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5196 N1.getOperand(0).getOperand(0),
5197 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5198 DAG.getConstant(Shamt, DL, GRLenVT));
5199 }
5200 }
5201
5202 // 7th pattern.
5203 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5204 // overwritten by the incoming bits are known to be zero.
5205 // =>
5206 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5207 //
5208 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5209 // before the 7th in order to match as many nodes as possible.
5210 if (N1.getOpcode() == ISD::AND &&
5211 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5212 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5213 N1.getOperand(0).getOpcode() == ISD::SHL &&
5214 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5215 CNShamt->getZExtValue() == MaskIdx) {
5216 APInt ShMask(ValBits, CNMask->getZExtValue());
5217 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5218 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5219 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5220 N1.getOperand(0).getOperand(0),
5221 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5222 DAG.getConstant(MaskIdx, DL, GRLenVT));
5223 }
5224 }
5225
5226 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5227 if (!SwapAndRetried) {
5228 std::swap(N0, N1);
5229 SwapAndRetried = true;
5230 goto Retry;
5231 }
5232
5233 SwapAndRetried = false;
5234Retry2:
5235 // 8th pattern.
5236 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5237 // the incoming bits are known to be zero.
5238 // =>
5239 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5240 //
5241 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5242 // we put it here in order to match as many nodes as possible or generate less
5243 // instructions.
5244 if (N1.getOpcode() == ISD::AND &&
5245 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5246 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5247 APInt ShMask(ValBits, CNMask->getZExtValue());
5248 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5249 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5250 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5251 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5252 N1->getOperand(0),
5253 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5254 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5255 DAG.getConstant(MaskIdx, DL, GRLenVT));
5256 }
5257 }
5258 // Swap N0/N1 and retry.
5259 if (!SwapAndRetried) {
5260 std::swap(N0, N1);
5261 SwapAndRetried = true;
5262 goto Retry2;
5263 }
5264
5265 return SDValue();
5266}
5267
5268static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5269 ExtType = ISD::NON_EXTLOAD;
5270
5271 switch (V.getNode()->getOpcode()) {
5272 case ISD::LOAD: {
5273 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5274 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5275 (LoadNode->getMemoryVT() == MVT::i16)) {
5276 ExtType = LoadNode->getExtensionType();
5277 return true;
5278 }
5279 return false;
5280 }
5281 case ISD::AssertSext: {
5282 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5283 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5284 ExtType = ISD::SEXTLOAD;
5285 return true;
5286 }
5287 return false;
5288 }
5289 case ISD::AssertZext: {
5290 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5291 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5292 ExtType = ISD::ZEXTLOAD;
5293 return true;
5294 }
5295 return false;
5296 }
5297 default:
5298 return false;
5299 }
5300
5301 return false;
5302}
5303
5304// Eliminate redundant truncation and zero-extension nodes.
5305// * Case 1:
5306// +------------+ +------------+ +------------+
5307// | Input1 | | Input2 | | CC |
5308// +------------+ +------------+ +------------+
5309// | | |
5310// V V +----+
5311// +------------+ +------------+ |
5312// | TRUNCATE | | TRUNCATE | |
5313// +------------+ +------------+ |
5314// | | |
5315// V V |
5316// +------------+ +------------+ |
5317// | ZERO_EXT | | ZERO_EXT | |
5318// +------------+ +------------+ |
5319// | | |
5320// | +-------------+ |
5321// V V | |
5322// +----------------+ | |
5323// | AND | | |
5324// +----------------+ | |
5325// | | |
5326// +---------------+ | |
5327// | | |
5328// V V V
5329// +-------------+
5330// | CMP |
5331// +-------------+
5332// * Case 2:
5333// +------------+ +------------+ +-------------+ +------------+ +------------+
5334// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5335// +------------+ +------------+ +-------------+ +------------+ +------------+
5336// | | | | |
5337// V | | | |
5338// +------------+ | | | |
5339// | XOR |<---------------------+ | |
5340// +------------+ | | |
5341// | | | |
5342// V V +---------------+ |
5343// +------------+ +------------+ | |
5344// | TRUNCATE | | TRUNCATE | | +-------------------------+
5345// +------------+ +------------+ | |
5346// | | | |
5347// V V | |
5348// +------------+ +------------+ | |
5349// | ZERO_EXT | | ZERO_EXT | | |
5350// +------------+ +------------+ | |
5351// | | | |
5352// V V | |
5353// +----------------+ | |
5354// | AND | | |
5355// +----------------+ | |
5356// | | |
5357// +---------------+ | |
5358// | | |
5359// V V V
5360// +-------------+
5361// | CMP |
5362// +-------------+
5365 const LoongArchSubtarget &Subtarget) {
5366 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5367
5368 SDNode *AndNode = N->getOperand(0).getNode();
5369 if (AndNode->getOpcode() != ISD::AND)
5370 return SDValue();
5371
5372 SDValue AndInputValue2 = AndNode->getOperand(1);
5373 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5374 return SDValue();
5375
5376 SDValue CmpInputValue = N->getOperand(1);
5377 SDValue AndInputValue1 = AndNode->getOperand(0);
5378 if (AndInputValue1.getOpcode() == ISD::XOR) {
5379 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5380 return SDValue();
5381 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5382 if (!CN || CN->getSExtValue() != -1)
5383 return SDValue();
5384 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5385 if (!CN || CN->getSExtValue() != 0)
5386 return SDValue();
5387 AndInputValue1 = AndInputValue1.getOperand(0);
5388 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5389 return SDValue();
5390 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5391 if (AndInputValue2 != CmpInputValue)
5392 return SDValue();
5393 } else {
5394 return SDValue();
5395 }
5396
5397 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5398 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5399 return SDValue();
5400
5401 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5402 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5403 return SDValue();
5404
5405 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5406 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5407 ISD::LoadExtType ExtType1;
5408 ISD::LoadExtType ExtType2;
5409
5410 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5411 !checkValueWidth(TruncInputValue2, ExtType2))
5412 return SDValue();
5413
5414 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5415 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5416 return SDValue();
5417
5418 if ((ExtType2 != ISD::ZEXTLOAD) &&
5419 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5420 return SDValue();
5421
5422 // These truncation and zero-extension nodes are not necessary, remove them.
5423 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5424 TruncInputValue1, TruncInputValue2);
5425 SDValue NewSetCC =
5426 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5427 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5428 return SDValue(N, 0);
5429}
5430
5431// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5434 const LoongArchSubtarget &Subtarget) {
5435 if (DCI.isBeforeLegalizeOps())
5436 return SDValue();
5437
5438 SDValue Src = N->getOperand(0);
5439 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5440 return SDValue();
5441
5442 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5443 Src.getOperand(0));
5444}
5445
5446// Perform common combines for BR_CC and SELECT_CC conditions.
5447static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5448 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5449 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5450
5451 // As far as arithmetic right shift always saves the sign,
5452 // shift can be omitted.
5453 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5454 // setge (sra X, N), 0 -> setge X, 0
5455 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5456 LHS.getOpcode() == ISD::SRA) {
5457 LHS = LHS.getOperand(0);
5458 return true;
5459 }
5460
5461 if (!ISD::isIntEqualitySetCC(CCVal))
5462 return false;
5463
5464 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5465 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5466 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5467 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5468 // If we're looking for eq 0 instead of ne 0, we need to invert the
5469 // condition.
5470 bool Invert = CCVal == ISD::SETEQ;
5471 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5472 if (Invert)
5473 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5474
5475 RHS = LHS.getOperand(1);
5476 LHS = LHS.getOperand(0);
5477 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5478
5479 CC = DAG.getCondCode(CCVal);
5480 return true;
5481 }
5482
5483 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5484 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5485 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5486 SDValue LHS0 = LHS.getOperand(0);
5487 if (LHS0.getOpcode() == ISD::AND &&
5488 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5489 uint64_t Mask = LHS0.getConstantOperandVal(1);
5490 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5491 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5492 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5493 CC = DAG.getCondCode(CCVal);
5494
5495 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5496 LHS = LHS0.getOperand(0);
5497 if (ShAmt != 0)
5498 LHS =
5499 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5500 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5501 return true;
5502 }
5503 }
5504 }
5505
5506 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5507 // This can occur when legalizing some floating point comparisons.
5508 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5509 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5510 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5511 CC = DAG.getCondCode(CCVal);
5512 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5513 return true;
5514 }
5515
5516 return false;
5517}
5518
5521 const LoongArchSubtarget &Subtarget) {
5522 SDValue LHS = N->getOperand(1);
5523 SDValue RHS = N->getOperand(2);
5524 SDValue CC = N->getOperand(3);
5525 SDLoc DL(N);
5526
5527 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5528 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5529 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5530
5531 return SDValue();
5532}
5533
5536 const LoongArchSubtarget &Subtarget) {
5537 // Transform
5538 SDValue LHS = N->getOperand(0);
5539 SDValue RHS = N->getOperand(1);
5540 SDValue CC = N->getOperand(2);
5541 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5542 SDValue TrueV = N->getOperand(3);
5543 SDValue FalseV = N->getOperand(4);
5544 SDLoc DL(N);
5545 EVT VT = N->getValueType(0);
5546
5547 // If the True and False values are the same, we don't need a select_cc.
5548 if (TrueV == FalseV)
5549 return TrueV;
5550
5551 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5552 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5553 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5555 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5556 if (CCVal == ISD::CondCode::SETGE)
5557 std::swap(TrueV, FalseV);
5558
5559 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5560 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5561 // Only handle simm12, if it is not in this range, it can be considered as
5562 // register.
5563 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5564 isInt<12>(TrueSImm - FalseSImm)) {
5565 SDValue SRA =
5566 DAG.getNode(ISD::SRA, DL, VT, LHS,
5567 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5568 SDValue AND =
5569 DAG.getNode(ISD::AND, DL, VT, SRA,
5570 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5571 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5572 }
5573
5574 if (CCVal == ISD::CondCode::SETGE)
5575 std::swap(TrueV, FalseV);
5576 }
5577
5578 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5579 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5580 {LHS, RHS, CC, TrueV, FalseV});
5581
5582 return SDValue();
5583}
5584
5585template <unsigned N>
5587 SelectionDAG &DAG,
5588 const LoongArchSubtarget &Subtarget,
5589 bool IsSigned = false) {
5590 SDLoc DL(Node);
5591 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5592 // Check the ImmArg.
5593 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5594 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5595 DAG.getContext()->emitError(Node->getOperationName(0) +
5596 ": argument out of range.");
5597 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5598 }
5599 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5600}
5601
5602template <unsigned N>
5603static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5604 SelectionDAG &DAG, bool IsSigned = false) {
5605 SDLoc DL(Node);
5606 EVT ResTy = Node->getValueType(0);
5607 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5608
5609 // Check the ImmArg.
5610 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5611 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5612 DAG.getContext()->emitError(Node->getOperationName(0) +
5613 ": argument out of range.");
5614 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5615 }
5616 return DAG.getConstant(
5618 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5619 DL, ResTy);
5620}
5621
5623 SDLoc DL(Node);
5624 EVT ResTy = Node->getValueType(0);
5625 SDValue Vec = Node->getOperand(2);
5626 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5627 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5628}
5629
5631 SDLoc DL(Node);
5632 EVT ResTy = Node->getValueType(0);
5633 SDValue One = DAG.getConstant(1, DL, ResTy);
5634 SDValue Bit =
5635 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5636
5637 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5638 DAG.getNOT(DL, Bit, ResTy));
5639}
5640
5641template <unsigned N>
5643 SDLoc DL(Node);
5644 EVT ResTy = Node->getValueType(0);
5645 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5646 // Check the unsigned ImmArg.
5647 if (!isUInt<N>(CImm->getZExtValue())) {
5648 DAG.getContext()->emitError(Node->getOperationName(0) +
5649 ": argument out of range.");
5650 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5651 }
5652
5653 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5654 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5655
5656 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5657}
5658
5659template <unsigned N>
5661 SDLoc DL(Node);
5662 EVT ResTy = Node->getValueType(0);
5663 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5664 // Check the unsigned ImmArg.
5665 if (!isUInt<N>(CImm->getZExtValue())) {
5666 DAG.getContext()->emitError(Node->getOperationName(0) +
5667 ": argument out of range.");
5668 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5669 }
5670
5671 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5672 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5673 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5674}
5675
5676template <unsigned N>
5678 SDLoc DL(Node);
5679 EVT ResTy = Node->getValueType(0);
5680 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5681 // Check the unsigned ImmArg.
5682 if (!isUInt<N>(CImm->getZExtValue())) {
5683 DAG.getContext()->emitError(Node->getOperationName(0) +
5684 ": argument out of range.");
5685 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5686 }
5687
5688 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5689 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5690 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5691}
5692
5693static SDValue
5696 const LoongArchSubtarget &Subtarget) {
5697 SDLoc DL(N);
5698 switch (N->getConstantOperandVal(0)) {
5699 default:
5700 break;
5701 case Intrinsic::loongarch_lsx_vadd_b:
5702 case Intrinsic::loongarch_lsx_vadd_h:
5703 case Intrinsic::loongarch_lsx_vadd_w:
5704 case Intrinsic::loongarch_lsx_vadd_d:
5705 case Intrinsic::loongarch_lasx_xvadd_b:
5706 case Intrinsic::loongarch_lasx_xvadd_h:
5707 case Intrinsic::loongarch_lasx_xvadd_w:
5708 case Intrinsic::loongarch_lasx_xvadd_d:
5709 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5710 N->getOperand(2));
5711 case Intrinsic::loongarch_lsx_vaddi_bu:
5712 case Intrinsic::loongarch_lsx_vaddi_hu:
5713 case Intrinsic::loongarch_lsx_vaddi_wu:
5714 case Intrinsic::loongarch_lsx_vaddi_du:
5715 case Intrinsic::loongarch_lasx_xvaddi_bu:
5716 case Intrinsic::loongarch_lasx_xvaddi_hu:
5717 case Intrinsic::loongarch_lasx_xvaddi_wu:
5718 case Intrinsic::loongarch_lasx_xvaddi_du:
5719 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5720 lowerVectorSplatImm<5>(N, 2, DAG));
5721 case Intrinsic::loongarch_lsx_vsub_b:
5722 case Intrinsic::loongarch_lsx_vsub_h:
5723 case Intrinsic::loongarch_lsx_vsub_w:
5724 case Intrinsic::loongarch_lsx_vsub_d:
5725 case Intrinsic::loongarch_lasx_xvsub_b:
5726 case Intrinsic::loongarch_lasx_xvsub_h:
5727 case Intrinsic::loongarch_lasx_xvsub_w:
5728 case Intrinsic::loongarch_lasx_xvsub_d:
5729 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5730 N->getOperand(2));
5731 case Intrinsic::loongarch_lsx_vsubi_bu:
5732 case Intrinsic::loongarch_lsx_vsubi_hu:
5733 case Intrinsic::loongarch_lsx_vsubi_wu:
5734 case Intrinsic::loongarch_lsx_vsubi_du:
5735 case Intrinsic::loongarch_lasx_xvsubi_bu:
5736 case Intrinsic::loongarch_lasx_xvsubi_hu:
5737 case Intrinsic::loongarch_lasx_xvsubi_wu:
5738 case Intrinsic::loongarch_lasx_xvsubi_du:
5739 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5740 lowerVectorSplatImm<5>(N, 2, DAG));
5741 case Intrinsic::loongarch_lsx_vneg_b:
5742 case Intrinsic::loongarch_lsx_vneg_h:
5743 case Intrinsic::loongarch_lsx_vneg_w:
5744 case Intrinsic::loongarch_lsx_vneg_d:
5745 case Intrinsic::loongarch_lasx_xvneg_b:
5746 case Intrinsic::loongarch_lasx_xvneg_h:
5747 case Intrinsic::loongarch_lasx_xvneg_w:
5748 case Intrinsic::loongarch_lasx_xvneg_d:
5749 return DAG.getNode(
5750 ISD::SUB, DL, N->getValueType(0),
5751 DAG.getConstant(
5752 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5753 /*isSigned=*/true),
5754 SDLoc(N), N->getValueType(0)),
5755 N->getOperand(1));
5756 case Intrinsic::loongarch_lsx_vmax_b:
5757 case Intrinsic::loongarch_lsx_vmax_h:
5758 case Intrinsic::loongarch_lsx_vmax_w:
5759 case Intrinsic::loongarch_lsx_vmax_d:
5760 case Intrinsic::loongarch_lasx_xvmax_b:
5761 case Intrinsic::loongarch_lasx_xvmax_h:
5762 case Intrinsic::loongarch_lasx_xvmax_w:
5763 case Intrinsic::loongarch_lasx_xvmax_d:
5764 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5765 N->getOperand(2));
5766 case Intrinsic::loongarch_lsx_vmax_bu:
5767 case Intrinsic::loongarch_lsx_vmax_hu:
5768 case Intrinsic::loongarch_lsx_vmax_wu:
5769 case Intrinsic::loongarch_lsx_vmax_du:
5770 case Intrinsic::loongarch_lasx_xvmax_bu:
5771 case Intrinsic::loongarch_lasx_xvmax_hu:
5772 case Intrinsic::loongarch_lasx_xvmax_wu:
5773 case Intrinsic::loongarch_lasx_xvmax_du:
5774 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5775 N->getOperand(2));
5776 case Intrinsic::loongarch_lsx_vmaxi_b:
5777 case Intrinsic::loongarch_lsx_vmaxi_h:
5778 case Intrinsic::loongarch_lsx_vmaxi_w:
5779 case Intrinsic::loongarch_lsx_vmaxi_d:
5780 case Intrinsic::loongarch_lasx_xvmaxi_b:
5781 case Intrinsic::loongarch_lasx_xvmaxi_h:
5782 case Intrinsic::loongarch_lasx_xvmaxi_w:
5783 case Intrinsic::loongarch_lasx_xvmaxi_d:
5784 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5785 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5786 case Intrinsic::loongarch_lsx_vmaxi_bu:
5787 case Intrinsic::loongarch_lsx_vmaxi_hu:
5788 case Intrinsic::loongarch_lsx_vmaxi_wu:
5789 case Intrinsic::loongarch_lsx_vmaxi_du:
5790 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5791 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5792 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5793 case Intrinsic::loongarch_lasx_xvmaxi_du:
5794 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5795 lowerVectorSplatImm<5>(N, 2, DAG));
5796 case Intrinsic::loongarch_lsx_vmin_b:
5797 case Intrinsic::loongarch_lsx_vmin_h:
5798 case Intrinsic::loongarch_lsx_vmin_w:
5799 case Intrinsic::loongarch_lsx_vmin_d:
5800 case Intrinsic::loongarch_lasx_xvmin_b:
5801 case Intrinsic::loongarch_lasx_xvmin_h:
5802 case Intrinsic::loongarch_lasx_xvmin_w:
5803 case Intrinsic::loongarch_lasx_xvmin_d:
5804 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5805 N->getOperand(2));
5806 case Intrinsic::loongarch_lsx_vmin_bu:
5807 case Intrinsic::loongarch_lsx_vmin_hu:
5808 case Intrinsic::loongarch_lsx_vmin_wu:
5809 case Intrinsic::loongarch_lsx_vmin_du:
5810 case Intrinsic::loongarch_lasx_xvmin_bu:
5811 case Intrinsic::loongarch_lasx_xvmin_hu:
5812 case Intrinsic::loongarch_lasx_xvmin_wu:
5813 case Intrinsic::loongarch_lasx_xvmin_du:
5814 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5815 N->getOperand(2));
5816 case Intrinsic::loongarch_lsx_vmini_b:
5817 case Intrinsic::loongarch_lsx_vmini_h:
5818 case Intrinsic::loongarch_lsx_vmini_w:
5819 case Intrinsic::loongarch_lsx_vmini_d:
5820 case Intrinsic::loongarch_lasx_xvmini_b:
5821 case Intrinsic::loongarch_lasx_xvmini_h:
5822 case Intrinsic::loongarch_lasx_xvmini_w:
5823 case Intrinsic::loongarch_lasx_xvmini_d:
5824 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5825 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5826 case Intrinsic::loongarch_lsx_vmini_bu:
5827 case Intrinsic::loongarch_lsx_vmini_hu:
5828 case Intrinsic::loongarch_lsx_vmini_wu:
5829 case Intrinsic::loongarch_lsx_vmini_du:
5830 case Intrinsic::loongarch_lasx_xvmini_bu:
5831 case Intrinsic::loongarch_lasx_xvmini_hu:
5832 case Intrinsic::loongarch_lasx_xvmini_wu:
5833 case Intrinsic::loongarch_lasx_xvmini_du:
5834 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5835 lowerVectorSplatImm<5>(N, 2, DAG));
5836 case Intrinsic::loongarch_lsx_vmul_b:
5837 case Intrinsic::loongarch_lsx_vmul_h:
5838 case Intrinsic::loongarch_lsx_vmul_w:
5839 case Intrinsic::loongarch_lsx_vmul_d:
5840 case Intrinsic::loongarch_lasx_xvmul_b:
5841 case Intrinsic::loongarch_lasx_xvmul_h:
5842 case Intrinsic::loongarch_lasx_xvmul_w:
5843 case Intrinsic::loongarch_lasx_xvmul_d:
5844 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5845 N->getOperand(2));
5846 case Intrinsic::loongarch_lsx_vmadd_b:
5847 case Intrinsic::loongarch_lsx_vmadd_h:
5848 case Intrinsic::loongarch_lsx_vmadd_w:
5849 case Intrinsic::loongarch_lsx_vmadd_d:
5850 case Intrinsic::loongarch_lasx_xvmadd_b:
5851 case Intrinsic::loongarch_lasx_xvmadd_h:
5852 case Intrinsic::loongarch_lasx_xvmadd_w:
5853 case Intrinsic::loongarch_lasx_xvmadd_d: {
5854 EVT ResTy = N->getValueType(0);
5855 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5856 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5857 N->getOperand(3)));
5858 }
5859 case Intrinsic::loongarch_lsx_vmsub_b:
5860 case Intrinsic::loongarch_lsx_vmsub_h:
5861 case Intrinsic::loongarch_lsx_vmsub_w:
5862 case Intrinsic::loongarch_lsx_vmsub_d:
5863 case Intrinsic::loongarch_lasx_xvmsub_b:
5864 case Intrinsic::loongarch_lasx_xvmsub_h:
5865 case Intrinsic::loongarch_lasx_xvmsub_w:
5866 case Intrinsic::loongarch_lasx_xvmsub_d: {
5867 EVT ResTy = N->getValueType(0);
5868 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5869 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5870 N->getOperand(3)));
5871 }
5872 case Intrinsic::loongarch_lsx_vdiv_b:
5873 case Intrinsic::loongarch_lsx_vdiv_h:
5874 case Intrinsic::loongarch_lsx_vdiv_w:
5875 case Intrinsic::loongarch_lsx_vdiv_d:
5876 case Intrinsic::loongarch_lasx_xvdiv_b:
5877 case Intrinsic::loongarch_lasx_xvdiv_h:
5878 case Intrinsic::loongarch_lasx_xvdiv_w:
5879 case Intrinsic::loongarch_lasx_xvdiv_d:
5880 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5881 N->getOperand(2));
5882 case Intrinsic::loongarch_lsx_vdiv_bu:
5883 case Intrinsic::loongarch_lsx_vdiv_hu:
5884 case Intrinsic::loongarch_lsx_vdiv_wu:
5885 case Intrinsic::loongarch_lsx_vdiv_du:
5886 case Intrinsic::loongarch_lasx_xvdiv_bu:
5887 case Intrinsic::loongarch_lasx_xvdiv_hu:
5888 case Intrinsic::loongarch_lasx_xvdiv_wu:
5889 case Intrinsic::loongarch_lasx_xvdiv_du:
5890 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5891 N->getOperand(2));
5892 case Intrinsic::loongarch_lsx_vmod_b:
5893 case Intrinsic::loongarch_lsx_vmod_h:
5894 case Intrinsic::loongarch_lsx_vmod_w:
5895 case Intrinsic::loongarch_lsx_vmod_d:
5896 case Intrinsic::loongarch_lasx_xvmod_b:
5897 case Intrinsic::loongarch_lasx_xvmod_h:
5898 case Intrinsic::loongarch_lasx_xvmod_w:
5899 case Intrinsic::loongarch_lasx_xvmod_d:
5900 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5901 N->getOperand(2));
5902 case Intrinsic::loongarch_lsx_vmod_bu:
5903 case Intrinsic::loongarch_lsx_vmod_hu:
5904 case Intrinsic::loongarch_lsx_vmod_wu:
5905 case Intrinsic::loongarch_lsx_vmod_du:
5906 case Intrinsic::loongarch_lasx_xvmod_bu:
5907 case Intrinsic::loongarch_lasx_xvmod_hu:
5908 case Intrinsic::loongarch_lasx_xvmod_wu:
5909 case Intrinsic::loongarch_lasx_xvmod_du:
5910 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5911 N->getOperand(2));
5912 case Intrinsic::loongarch_lsx_vand_v:
5913 case Intrinsic::loongarch_lasx_xvand_v:
5914 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5915 N->getOperand(2));
5916 case Intrinsic::loongarch_lsx_vor_v:
5917 case Intrinsic::loongarch_lasx_xvor_v:
5918 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5919 N->getOperand(2));
5920 case Intrinsic::loongarch_lsx_vxor_v:
5921 case Intrinsic::loongarch_lasx_xvxor_v:
5922 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5923 N->getOperand(2));
5924 case Intrinsic::loongarch_lsx_vnor_v:
5925 case Intrinsic::loongarch_lasx_xvnor_v: {
5926 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5927 N->getOperand(2));
5928 return DAG.getNOT(DL, Res, Res->getValueType(0));
5929 }
5930 case Intrinsic::loongarch_lsx_vandi_b:
5931 case Intrinsic::loongarch_lasx_xvandi_b:
5932 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5933 lowerVectorSplatImm<8>(N, 2, DAG));
5934 case Intrinsic::loongarch_lsx_vori_b:
5935 case Intrinsic::loongarch_lasx_xvori_b:
5936 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5937 lowerVectorSplatImm<8>(N, 2, DAG));
5938 case Intrinsic::loongarch_lsx_vxori_b:
5939 case Intrinsic::loongarch_lasx_xvxori_b:
5940 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5941 lowerVectorSplatImm<8>(N, 2, DAG));
5942 case Intrinsic::loongarch_lsx_vsll_b:
5943 case Intrinsic::loongarch_lsx_vsll_h:
5944 case Intrinsic::loongarch_lsx_vsll_w:
5945 case Intrinsic::loongarch_lsx_vsll_d:
5946 case Intrinsic::loongarch_lasx_xvsll_b:
5947 case Intrinsic::loongarch_lasx_xvsll_h:
5948 case Intrinsic::loongarch_lasx_xvsll_w:
5949 case Intrinsic::loongarch_lasx_xvsll_d:
5950 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5951 truncateVecElts(N, DAG));
5952 case Intrinsic::loongarch_lsx_vslli_b:
5953 case Intrinsic::loongarch_lasx_xvslli_b:
5954 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5955 lowerVectorSplatImm<3>(N, 2, DAG));
5956 case Intrinsic::loongarch_lsx_vslli_h:
5957 case Intrinsic::loongarch_lasx_xvslli_h:
5958 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5959 lowerVectorSplatImm<4>(N, 2, DAG));
5960 case Intrinsic::loongarch_lsx_vslli_w:
5961 case Intrinsic::loongarch_lasx_xvslli_w:
5962 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5963 lowerVectorSplatImm<5>(N, 2, DAG));
5964 case Intrinsic::loongarch_lsx_vslli_d:
5965 case Intrinsic::loongarch_lasx_xvslli_d:
5966 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5967 lowerVectorSplatImm<6>(N, 2, DAG));
5968 case Intrinsic::loongarch_lsx_vsrl_b:
5969 case Intrinsic::loongarch_lsx_vsrl_h:
5970 case Intrinsic::loongarch_lsx_vsrl_w:
5971 case Intrinsic::loongarch_lsx_vsrl_d:
5972 case Intrinsic::loongarch_lasx_xvsrl_b:
5973 case Intrinsic::loongarch_lasx_xvsrl_h:
5974 case Intrinsic::loongarch_lasx_xvsrl_w:
5975 case Intrinsic::loongarch_lasx_xvsrl_d:
5976 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5977 truncateVecElts(N, DAG));
5978 case Intrinsic::loongarch_lsx_vsrli_b:
5979 case Intrinsic::loongarch_lasx_xvsrli_b:
5980 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5981 lowerVectorSplatImm<3>(N, 2, DAG));
5982 case Intrinsic::loongarch_lsx_vsrli_h:
5983 case Intrinsic::loongarch_lasx_xvsrli_h:
5984 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5985 lowerVectorSplatImm<4>(N, 2, DAG));
5986 case Intrinsic::loongarch_lsx_vsrli_w:
5987 case Intrinsic::loongarch_lasx_xvsrli_w:
5988 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5989 lowerVectorSplatImm<5>(N, 2, DAG));
5990 case Intrinsic::loongarch_lsx_vsrli_d:
5991 case Intrinsic::loongarch_lasx_xvsrli_d:
5992 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5993 lowerVectorSplatImm<6>(N, 2, DAG));
5994 case Intrinsic::loongarch_lsx_vsra_b:
5995 case Intrinsic::loongarch_lsx_vsra_h:
5996 case Intrinsic::loongarch_lsx_vsra_w:
5997 case Intrinsic::loongarch_lsx_vsra_d:
5998 case Intrinsic::loongarch_lasx_xvsra_b:
5999 case Intrinsic::loongarch_lasx_xvsra_h:
6000 case Intrinsic::loongarch_lasx_xvsra_w:
6001 case Intrinsic::loongarch_lasx_xvsra_d:
6002 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6003 truncateVecElts(N, DAG));
6004 case Intrinsic::loongarch_lsx_vsrai_b:
6005 case Intrinsic::loongarch_lasx_xvsrai_b:
6006 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6007 lowerVectorSplatImm<3>(N, 2, DAG));
6008 case Intrinsic::loongarch_lsx_vsrai_h:
6009 case Intrinsic::loongarch_lasx_xvsrai_h:
6010 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6011 lowerVectorSplatImm<4>(N, 2, DAG));
6012 case Intrinsic::loongarch_lsx_vsrai_w:
6013 case Intrinsic::loongarch_lasx_xvsrai_w:
6014 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6015 lowerVectorSplatImm<5>(N, 2, DAG));
6016 case Intrinsic::loongarch_lsx_vsrai_d:
6017 case Intrinsic::loongarch_lasx_xvsrai_d:
6018 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6019 lowerVectorSplatImm<6>(N, 2, DAG));
6020 case Intrinsic::loongarch_lsx_vclz_b:
6021 case Intrinsic::loongarch_lsx_vclz_h:
6022 case Intrinsic::loongarch_lsx_vclz_w:
6023 case Intrinsic::loongarch_lsx_vclz_d:
6024 case Intrinsic::loongarch_lasx_xvclz_b:
6025 case Intrinsic::loongarch_lasx_xvclz_h:
6026 case Intrinsic::loongarch_lasx_xvclz_w:
6027 case Intrinsic::loongarch_lasx_xvclz_d:
6028 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6029 case Intrinsic::loongarch_lsx_vpcnt_b:
6030 case Intrinsic::loongarch_lsx_vpcnt_h:
6031 case Intrinsic::loongarch_lsx_vpcnt_w:
6032 case Intrinsic::loongarch_lsx_vpcnt_d:
6033 case Intrinsic::loongarch_lasx_xvpcnt_b:
6034 case Intrinsic::loongarch_lasx_xvpcnt_h:
6035 case Intrinsic::loongarch_lasx_xvpcnt_w:
6036 case Intrinsic::loongarch_lasx_xvpcnt_d:
6037 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6038 case Intrinsic::loongarch_lsx_vbitclr_b:
6039 case Intrinsic::loongarch_lsx_vbitclr_h:
6040 case Intrinsic::loongarch_lsx_vbitclr_w:
6041 case Intrinsic::loongarch_lsx_vbitclr_d:
6042 case Intrinsic::loongarch_lasx_xvbitclr_b:
6043 case Intrinsic::loongarch_lasx_xvbitclr_h:
6044 case Intrinsic::loongarch_lasx_xvbitclr_w:
6045 case Intrinsic::loongarch_lasx_xvbitclr_d:
6046 return lowerVectorBitClear(N, DAG);
6047 case Intrinsic::loongarch_lsx_vbitclri_b:
6048 case Intrinsic::loongarch_lasx_xvbitclri_b:
6049 return lowerVectorBitClearImm<3>(N, DAG);
6050 case Intrinsic::loongarch_lsx_vbitclri_h:
6051 case Intrinsic::loongarch_lasx_xvbitclri_h:
6052 return lowerVectorBitClearImm<4>(N, DAG);
6053 case Intrinsic::loongarch_lsx_vbitclri_w:
6054 case Intrinsic::loongarch_lasx_xvbitclri_w:
6055 return lowerVectorBitClearImm<5>(N, DAG);
6056 case Intrinsic::loongarch_lsx_vbitclri_d:
6057 case Intrinsic::loongarch_lasx_xvbitclri_d:
6058 return lowerVectorBitClearImm<6>(N, DAG);
6059 case Intrinsic::loongarch_lsx_vbitset_b:
6060 case Intrinsic::loongarch_lsx_vbitset_h:
6061 case Intrinsic::loongarch_lsx_vbitset_w:
6062 case Intrinsic::loongarch_lsx_vbitset_d:
6063 case Intrinsic::loongarch_lasx_xvbitset_b:
6064 case Intrinsic::loongarch_lasx_xvbitset_h:
6065 case Intrinsic::loongarch_lasx_xvbitset_w:
6066 case Intrinsic::loongarch_lasx_xvbitset_d: {
6067 EVT VecTy = N->getValueType(0);
6068 SDValue One = DAG.getConstant(1, DL, VecTy);
6069 return DAG.getNode(
6070 ISD::OR, DL, VecTy, N->getOperand(1),
6071 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6072 }
6073 case Intrinsic::loongarch_lsx_vbitseti_b:
6074 case Intrinsic::loongarch_lasx_xvbitseti_b:
6075 return lowerVectorBitSetImm<3>(N, DAG);
6076 case Intrinsic::loongarch_lsx_vbitseti_h:
6077 case Intrinsic::loongarch_lasx_xvbitseti_h:
6078 return lowerVectorBitSetImm<4>(N, DAG);
6079 case Intrinsic::loongarch_lsx_vbitseti_w:
6080 case Intrinsic::loongarch_lasx_xvbitseti_w:
6081 return lowerVectorBitSetImm<5>(N, DAG);
6082 case Intrinsic::loongarch_lsx_vbitseti_d:
6083 case Intrinsic::loongarch_lasx_xvbitseti_d:
6084 return lowerVectorBitSetImm<6>(N, DAG);
6085 case Intrinsic::loongarch_lsx_vbitrev_b:
6086 case Intrinsic::loongarch_lsx_vbitrev_h:
6087 case Intrinsic::loongarch_lsx_vbitrev_w:
6088 case Intrinsic::loongarch_lsx_vbitrev_d:
6089 case Intrinsic::loongarch_lasx_xvbitrev_b:
6090 case Intrinsic::loongarch_lasx_xvbitrev_h:
6091 case Intrinsic::loongarch_lasx_xvbitrev_w:
6092 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6093 EVT VecTy = N->getValueType(0);
6094 SDValue One = DAG.getConstant(1, DL, VecTy);
6095 return DAG.getNode(
6096 ISD::XOR, DL, VecTy, N->getOperand(1),
6097 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6098 }
6099 case Intrinsic::loongarch_lsx_vbitrevi_b:
6100 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6101 return lowerVectorBitRevImm<3>(N, DAG);
6102 case Intrinsic::loongarch_lsx_vbitrevi_h:
6103 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6104 return lowerVectorBitRevImm<4>(N, DAG);
6105 case Intrinsic::loongarch_lsx_vbitrevi_w:
6106 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6107 return lowerVectorBitRevImm<5>(N, DAG);
6108 case Intrinsic::loongarch_lsx_vbitrevi_d:
6109 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6110 return lowerVectorBitRevImm<6>(N, DAG);
6111 case Intrinsic::loongarch_lsx_vfadd_s:
6112 case Intrinsic::loongarch_lsx_vfadd_d:
6113 case Intrinsic::loongarch_lasx_xvfadd_s:
6114 case Intrinsic::loongarch_lasx_xvfadd_d:
6115 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6116 N->getOperand(2));
6117 case Intrinsic::loongarch_lsx_vfsub_s:
6118 case Intrinsic::loongarch_lsx_vfsub_d:
6119 case Intrinsic::loongarch_lasx_xvfsub_s:
6120 case Intrinsic::loongarch_lasx_xvfsub_d:
6121 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6122 N->getOperand(2));
6123 case Intrinsic::loongarch_lsx_vfmul_s:
6124 case Intrinsic::loongarch_lsx_vfmul_d:
6125 case Intrinsic::loongarch_lasx_xvfmul_s:
6126 case Intrinsic::loongarch_lasx_xvfmul_d:
6127 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6128 N->getOperand(2));
6129 case Intrinsic::loongarch_lsx_vfdiv_s:
6130 case Intrinsic::loongarch_lsx_vfdiv_d:
6131 case Intrinsic::loongarch_lasx_xvfdiv_s:
6132 case Intrinsic::loongarch_lasx_xvfdiv_d:
6133 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6134 N->getOperand(2));
6135 case Intrinsic::loongarch_lsx_vfmadd_s:
6136 case Intrinsic::loongarch_lsx_vfmadd_d:
6137 case Intrinsic::loongarch_lasx_xvfmadd_s:
6138 case Intrinsic::loongarch_lasx_xvfmadd_d:
6139 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6140 N->getOperand(2), N->getOperand(3));
6141 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6142 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6143 N->getOperand(1), N->getOperand(2),
6144 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6145 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6146 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6147 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6148 N->getOperand(1), N->getOperand(2),
6149 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6150 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6151 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6152 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6153 N->getOperand(1), N->getOperand(2),
6154 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6155 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6156 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6157 N->getOperand(1), N->getOperand(2),
6158 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6159 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6160 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6161 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6162 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6163 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6164 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6165 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6166 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6167 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6168 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6169 N->getOperand(1)));
6170 case Intrinsic::loongarch_lsx_vreplve_b:
6171 case Intrinsic::loongarch_lsx_vreplve_h:
6172 case Intrinsic::loongarch_lsx_vreplve_w:
6173 case Intrinsic::loongarch_lsx_vreplve_d:
6174 case Intrinsic::loongarch_lasx_xvreplve_b:
6175 case Intrinsic::loongarch_lasx_xvreplve_h:
6176 case Intrinsic::loongarch_lasx_xvreplve_w:
6177 case Intrinsic::loongarch_lasx_xvreplve_d:
6178 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6179 N->getOperand(1),
6180 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6181 N->getOperand(2)));
6182 }
6183 return SDValue();
6184}
6185
6188 const LoongArchSubtarget &Subtarget) {
6189 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6190 // conversion is unnecessary and can be replaced with the
6191 // MOVFR2GR_S_LA64 operand.
6192 SDValue Op0 = N->getOperand(0);
6194 return Op0.getOperand(0);
6195 return SDValue();
6196}
6197
6200 const LoongArchSubtarget &Subtarget) {
6201 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6202 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6203 // operand.
6204 SDValue Op0 = N->getOperand(0);
6206 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6207 "Unexpected value type!");
6208 return Op0.getOperand(0);
6209 }
6210 return SDValue();
6211}
6212
6215 const LoongArchSubtarget &Subtarget) {
6216 MVT VT = N->getSimpleValueType(0);
6217 unsigned NumBits = VT.getScalarSizeInBits();
6218
6219 // Simplify the inputs.
6220 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6221 APInt DemandedMask(APInt::getAllOnes(NumBits));
6222 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6223 return SDValue(N, 0);
6224
6225 return SDValue();
6226}
6227
6228static SDValue
6231 const LoongArchSubtarget &Subtarget) {
6232 SDValue Op0 = N->getOperand(0);
6233 SDLoc DL(N);
6234
6235 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6236 // redundant. Instead, use BuildPairF64's operands directly.
6238 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6239
6240 if (Op0->isUndef()) {
6241 SDValue Lo = DAG.getUNDEF(MVT::i32);
6242 SDValue Hi = DAG.getUNDEF(MVT::i32);
6243 return DCI.CombineTo(N, Lo, Hi);
6244 }
6245
6246 // It's cheaper to materialise two 32-bit integers than to load a double
6247 // from the constant pool and transfer it to integer registers through the
6248 // stack.
6250 APInt V = C->getValueAPF().bitcastToAPInt();
6251 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6252 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6253 return DCI.CombineTo(N, Lo, Hi);
6254 }
6255
6256 return SDValue();
6257}
6258
6259static SDValue
6262 const LoongArchSubtarget &Subtarget) {
6263 if (!DCI.isBeforeLegalize())
6264 return SDValue();
6265
6266 MVT EltVT = N->getSimpleValueType(0);
6267 SDValue Vec = N->getOperand(0);
6268 EVT VecTy = Vec->getValueType(0);
6269 SDValue Idx = N->getOperand(1);
6270 unsigned IdxOp = Idx.getOpcode();
6271 SDLoc DL(N);
6272
6273 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6274 return SDValue();
6275
6276 // Combine:
6277 // t2 = truncate t1
6278 // t3 = {zero/sign/any}_extend t2
6279 // t4 = extract_vector_elt t0, t3
6280 // to:
6281 // t4 = extract_vector_elt t0, t1
6282 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6283 IdxOp == ISD::ANY_EXTEND) {
6284 SDValue IdxOrig = Idx.getOperand(0);
6285 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6286 return SDValue();
6287
6288 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6289 IdxOrig.getOperand(0));
6290 }
6291
6292 return SDValue();
6293}
6294
6296 DAGCombinerInfo &DCI) const {
6297 SelectionDAG &DAG = DCI.DAG;
6298 switch (N->getOpcode()) {
6299 default:
6300 break;
6301 case ISD::AND:
6302 return performANDCombine(N, DAG, DCI, Subtarget);
6303 case ISD::OR:
6304 return performORCombine(N, DAG, DCI, Subtarget);
6305 case ISD::SETCC:
6306 return performSETCCCombine(N, DAG, DCI, Subtarget);
6307 case ISD::SRL:
6308 return performSRLCombine(N, DAG, DCI, Subtarget);
6309 case ISD::BITCAST:
6310 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6312 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6314 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6316 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6318 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6320 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6322 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6325 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6327 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6329 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6330 }
6331 return SDValue();
6332}
6333
6336 if (!ZeroDivCheck)
6337 return MBB;
6338
6339 // Build instructions:
6340 // MBB:
6341 // div(or mod) $dst, $dividend, $divisor
6342 // bne $divisor, $zero, SinkMBB
6343 // BreakMBB:
6344 // break 7 // BRK_DIVZERO
6345 // SinkMBB:
6346 // fallthrough
6347 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6348 MachineFunction::iterator It = ++MBB->getIterator();
6349 MachineFunction *MF = MBB->getParent();
6350 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6351 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6352 MF->insert(It, BreakMBB);
6353 MF->insert(It, SinkMBB);
6354
6355 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6356 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6357 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6358
6359 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6360 DebugLoc DL = MI.getDebugLoc();
6361 MachineOperand &Divisor = MI.getOperand(2);
6362 Register DivisorReg = Divisor.getReg();
6363
6364 // MBB:
6365 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6366 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6367 .addReg(LoongArch::R0)
6368 .addMBB(SinkMBB);
6369 MBB->addSuccessor(BreakMBB);
6370 MBB->addSuccessor(SinkMBB);
6371
6372 // BreakMBB:
6373 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6374 // definition of BRK_DIVZERO.
6375 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6376 BreakMBB->addSuccessor(SinkMBB);
6377
6378 // Clear Divisor's kill flag.
6379 Divisor.setIsKill(false);
6380
6381 return SinkMBB;
6382}
6383
6384static MachineBasicBlock *
6386 const LoongArchSubtarget &Subtarget) {
6387 unsigned CondOpc;
6388 switch (MI.getOpcode()) {
6389 default:
6390 llvm_unreachable("Unexpected opcode");
6391 case LoongArch::PseudoVBZ:
6392 CondOpc = LoongArch::VSETEQZ_V;
6393 break;
6394 case LoongArch::PseudoVBZ_B:
6395 CondOpc = LoongArch::VSETANYEQZ_B;
6396 break;
6397 case LoongArch::PseudoVBZ_H:
6398 CondOpc = LoongArch::VSETANYEQZ_H;
6399 break;
6400 case LoongArch::PseudoVBZ_W:
6401 CondOpc = LoongArch::VSETANYEQZ_W;
6402 break;
6403 case LoongArch::PseudoVBZ_D:
6404 CondOpc = LoongArch::VSETANYEQZ_D;
6405 break;
6406 case LoongArch::PseudoVBNZ:
6407 CondOpc = LoongArch::VSETNEZ_V;
6408 break;
6409 case LoongArch::PseudoVBNZ_B:
6410 CondOpc = LoongArch::VSETALLNEZ_B;
6411 break;
6412 case LoongArch::PseudoVBNZ_H:
6413 CondOpc = LoongArch::VSETALLNEZ_H;
6414 break;
6415 case LoongArch::PseudoVBNZ_W:
6416 CondOpc = LoongArch::VSETALLNEZ_W;
6417 break;
6418 case LoongArch::PseudoVBNZ_D:
6419 CondOpc = LoongArch::VSETALLNEZ_D;
6420 break;
6421 case LoongArch::PseudoXVBZ:
6422 CondOpc = LoongArch::XVSETEQZ_V;
6423 break;
6424 case LoongArch::PseudoXVBZ_B:
6425 CondOpc = LoongArch::XVSETANYEQZ_B;
6426 break;
6427 case LoongArch::PseudoXVBZ_H:
6428 CondOpc = LoongArch::XVSETANYEQZ_H;
6429 break;
6430 case LoongArch::PseudoXVBZ_W:
6431 CondOpc = LoongArch::XVSETANYEQZ_W;
6432 break;
6433 case LoongArch::PseudoXVBZ_D:
6434 CondOpc = LoongArch::XVSETANYEQZ_D;
6435 break;
6436 case LoongArch::PseudoXVBNZ:
6437 CondOpc = LoongArch::XVSETNEZ_V;
6438 break;
6439 case LoongArch::PseudoXVBNZ_B:
6440 CondOpc = LoongArch::XVSETALLNEZ_B;
6441 break;
6442 case LoongArch::PseudoXVBNZ_H:
6443 CondOpc = LoongArch::XVSETALLNEZ_H;
6444 break;
6445 case LoongArch::PseudoXVBNZ_W:
6446 CondOpc = LoongArch::XVSETALLNEZ_W;
6447 break;
6448 case LoongArch::PseudoXVBNZ_D:
6449 CondOpc = LoongArch::XVSETALLNEZ_D;
6450 break;
6451 }
6452
6453 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6454 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6455 DebugLoc DL = MI.getDebugLoc();
6458
6459 MachineFunction *F = BB->getParent();
6460 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6461 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6462 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6463
6464 F->insert(It, FalseBB);
6465 F->insert(It, TrueBB);
6466 F->insert(It, SinkBB);
6467
6468 // Transfer the remainder of MBB and its successor edges to Sink.
6469 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6471
6472 // Insert the real instruction to BB.
6473 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6474 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6475
6476 // Insert branch.
6477 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6478 BB->addSuccessor(FalseBB);
6479 BB->addSuccessor(TrueBB);
6480
6481 // FalseBB.
6482 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6483 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6484 .addReg(LoongArch::R0)
6485 .addImm(0);
6486 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6487 FalseBB->addSuccessor(SinkBB);
6488
6489 // TrueBB.
6490 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6491 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6492 .addReg(LoongArch::R0)
6493 .addImm(1);
6494 TrueBB->addSuccessor(SinkBB);
6495
6496 // SinkBB: merge the results.
6497 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6498 MI.getOperand(0).getReg())
6499 .addReg(RD1)
6500 .addMBB(FalseBB)
6501 .addReg(RD2)
6502 .addMBB(TrueBB);
6503
6504 // The pseudo instruction is gone now.
6505 MI.eraseFromParent();
6506 return SinkBB;
6507}
6508
6509static MachineBasicBlock *
6511 const LoongArchSubtarget &Subtarget) {
6512 unsigned InsOp;
6513 unsigned BroadcastOp;
6514 unsigned HalfSize;
6515 switch (MI.getOpcode()) {
6516 default:
6517 llvm_unreachable("Unexpected opcode");
6518 case LoongArch::PseudoXVINSGR2VR_B:
6519 HalfSize = 16;
6520 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6521 InsOp = LoongArch::XVEXTRINS_B;
6522 break;
6523 case LoongArch::PseudoXVINSGR2VR_H:
6524 HalfSize = 8;
6525 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6526 InsOp = LoongArch::XVEXTRINS_H;
6527 break;
6528 }
6529 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6530 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6531 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6532 DebugLoc DL = MI.getDebugLoc();
6534 // XDst = vector_insert XSrc, Elt, Idx
6535 Register XDst = MI.getOperand(0).getReg();
6536 Register XSrc = MI.getOperand(1).getReg();
6537 Register Elt = MI.getOperand(2).getReg();
6538 unsigned Idx = MI.getOperand(3).getImm();
6539
6540 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6541 Idx < HalfSize) {
6542 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6543 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6544
6545 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6546 .addReg(XSrc, 0, LoongArch::sub_128);
6547 BuildMI(*BB, MI, DL,
6548 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6549 : LoongArch::VINSGR2VR_B),
6550 ScratchSubReg2)
6551 .addReg(ScratchSubReg1)
6552 .addReg(Elt)
6553 .addImm(Idx);
6554
6555 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6556 .addImm(0)
6557 .addReg(ScratchSubReg2)
6558 .addImm(LoongArch::sub_128);
6559 } else {
6560 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6561 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6562
6563 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6564
6565 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6566 .addReg(ScratchReg1)
6567 .addReg(XSrc)
6568 .addImm(Idx >= HalfSize ? 48 : 18);
6569
6570 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6571 .addReg(XSrc)
6572 .addReg(ScratchReg2)
6573 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6574 }
6575
6576 MI.eraseFromParent();
6577 return BB;
6578}
6579
6582 const LoongArchSubtarget &Subtarget) {
6583 assert(Subtarget.hasExtLSX());
6584 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6585 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6586 DebugLoc DL = MI.getDebugLoc();
6588 Register Dst = MI.getOperand(0).getReg();
6589 Register Src = MI.getOperand(1).getReg();
6590 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6591 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6592 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6593
6594 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6595 BuildMI(*BB, MI, DL,
6596 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6597 : LoongArch::VINSGR2VR_W),
6598 ScratchReg2)
6599 .addReg(ScratchReg1)
6600 .addReg(Src)
6601 .addImm(0);
6602 BuildMI(
6603 *BB, MI, DL,
6604 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6605 ScratchReg3)
6606 .addReg(ScratchReg2);
6607 BuildMI(*BB, MI, DL,
6608 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6609 : LoongArch::VPICKVE2GR_W),
6610 Dst)
6611 .addReg(ScratchReg3)
6612 .addImm(0);
6613
6614 MI.eraseFromParent();
6615 return BB;
6616}
6617
6618static MachineBasicBlock *
6620 const LoongArchSubtarget &Subtarget) {
6621 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6622 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6623 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6625 Register Dst = MI.getOperand(0).getReg();
6626 Register Src = MI.getOperand(1).getReg();
6627 DebugLoc DL = MI.getDebugLoc();
6628 unsigned EleBits = 8;
6629 unsigned NotOpc = 0;
6630 unsigned MskOpc;
6631
6632 switch (MI.getOpcode()) {
6633 default:
6634 llvm_unreachable("Unexpected opcode");
6635 case LoongArch::PseudoVMSKLTZ_B:
6636 MskOpc = LoongArch::VMSKLTZ_B;
6637 break;
6638 case LoongArch::PseudoVMSKLTZ_H:
6639 MskOpc = LoongArch::VMSKLTZ_H;
6640 EleBits = 16;
6641 break;
6642 case LoongArch::PseudoVMSKLTZ_W:
6643 MskOpc = LoongArch::VMSKLTZ_W;
6644 EleBits = 32;
6645 break;
6646 case LoongArch::PseudoVMSKLTZ_D:
6647 MskOpc = LoongArch::VMSKLTZ_D;
6648 EleBits = 64;
6649 break;
6650 case LoongArch::PseudoVMSKGEZ_B:
6651 MskOpc = LoongArch::VMSKGEZ_B;
6652 break;
6653 case LoongArch::PseudoVMSKEQZ_B:
6654 MskOpc = LoongArch::VMSKNZ_B;
6655 NotOpc = LoongArch::VNOR_V;
6656 break;
6657 case LoongArch::PseudoVMSKNEZ_B:
6658 MskOpc = LoongArch::VMSKNZ_B;
6659 break;
6660 case LoongArch::PseudoXVMSKLTZ_B:
6661 MskOpc = LoongArch::XVMSKLTZ_B;
6662 RC = &LoongArch::LASX256RegClass;
6663 break;
6664 case LoongArch::PseudoXVMSKLTZ_H:
6665 MskOpc = LoongArch::XVMSKLTZ_H;
6666 RC = &LoongArch::LASX256RegClass;
6667 EleBits = 16;
6668 break;
6669 case LoongArch::PseudoXVMSKLTZ_W:
6670 MskOpc = LoongArch::XVMSKLTZ_W;
6671 RC = &LoongArch::LASX256RegClass;
6672 EleBits = 32;
6673 break;
6674 case LoongArch::PseudoXVMSKLTZ_D:
6675 MskOpc = LoongArch::XVMSKLTZ_D;
6676 RC = &LoongArch::LASX256RegClass;
6677 EleBits = 64;
6678 break;
6679 case LoongArch::PseudoXVMSKGEZ_B:
6680 MskOpc = LoongArch::XVMSKGEZ_B;
6681 RC = &LoongArch::LASX256RegClass;
6682 break;
6683 case LoongArch::PseudoXVMSKEQZ_B:
6684 MskOpc = LoongArch::XVMSKNZ_B;
6685 NotOpc = LoongArch::XVNOR_V;
6686 RC = &LoongArch::LASX256RegClass;
6687 break;
6688 case LoongArch::PseudoXVMSKNEZ_B:
6689 MskOpc = LoongArch::XVMSKNZ_B;
6690 RC = &LoongArch::LASX256RegClass;
6691 break;
6692 }
6693
6694 Register Msk = MRI.createVirtualRegister(RC);
6695 if (NotOpc) {
6696 Register Tmp = MRI.createVirtualRegister(RC);
6697 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6698 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6699 .addReg(Tmp, RegState::Kill)
6700 .addReg(Tmp, RegState::Kill);
6701 } else {
6702 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6703 }
6704
6705 if (TRI->getRegSizeInBits(*RC) > 128) {
6706 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6707 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6708 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6709 .addReg(Msk)
6710 .addImm(0);
6711 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6712 .addReg(Msk, RegState::Kill)
6713 .addImm(4);
6714 BuildMI(*BB, MI, DL,
6715 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6716 : LoongArch::BSTRINS_W),
6717 Dst)
6720 .addImm(256 / EleBits - 1)
6721 .addImm(128 / EleBits);
6722 } else {
6723 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6724 .addReg(Msk, RegState::Kill)
6725 .addImm(0);
6726 }
6727
6728 MI.eraseFromParent();
6729 return BB;
6730}
6731
6732static MachineBasicBlock *
6734 const LoongArchSubtarget &Subtarget) {
6735 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6736 "Unexpected instruction");
6737
6738 MachineFunction &MF = *BB->getParent();
6739 DebugLoc DL = MI.getDebugLoc();
6741 Register LoReg = MI.getOperand(0).getReg();
6742 Register HiReg = MI.getOperand(1).getReg();
6743 Register SrcReg = MI.getOperand(2).getReg();
6744
6745 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6746 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6747 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6748 MI.eraseFromParent(); // The pseudo instruction is gone now.
6749 return BB;
6750}
6751
6752static MachineBasicBlock *
6754 const LoongArchSubtarget &Subtarget) {
6755 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6756 "Unexpected instruction");
6757
6758 MachineFunction &MF = *BB->getParent();
6759 DebugLoc DL = MI.getDebugLoc();
6762 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6763 Register DstReg = MI.getOperand(0).getReg();
6764 Register LoReg = MI.getOperand(1).getReg();
6765 Register HiReg = MI.getOperand(2).getReg();
6766
6767 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6768 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6769 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6770 .addReg(TmpReg, RegState::Kill)
6771 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6772 MI.eraseFromParent(); // The pseudo instruction is gone now.
6773 return BB;
6774}
6775
6777 switch (MI.getOpcode()) {
6778 default:
6779 return false;
6780 case LoongArch::Select_GPR_Using_CC_GPR:
6781 return true;
6782 }
6783}
6784
6785static MachineBasicBlock *
6787 const LoongArchSubtarget &Subtarget) {
6788 // To "insert" Select_* instructions, we actually have to insert the triangle
6789 // control-flow pattern. The incoming instructions know the destination vreg
6790 // to set, the condition code register to branch on, the true/false values to
6791 // select between, and the condcode to use to select the appropriate branch.
6792 //
6793 // We produce the following control flow:
6794 // HeadMBB
6795 // | \
6796 // | IfFalseMBB
6797 // | /
6798 // TailMBB
6799 //
6800 // When we find a sequence of selects we attempt to optimize their emission
6801 // by sharing the control flow. Currently we only handle cases where we have
6802 // multiple selects with the exact same condition (same LHS, RHS and CC).
6803 // The selects may be interleaved with other instructions if the other
6804 // instructions meet some requirements we deem safe:
6805 // - They are not pseudo instructions.
6806 // - They are debug instructions. Otherwise,
6807 // - They do not have side-effects, do not access memory and their inputs do
6808 // not depend on the results of the select pseudo-instructions.
6809 // The TrueV/FalseV operands of the selects cannot depend on the result of
6810 // previous selects in the sequence.
6811 // These conditions could be further relaxed. See the X86 target for a
6812 // related approach and more information.
6813
6814 Register LHS = MI.getOperand(1).getReg();
6815 Register RHS;
6816 if (MI.getOperand(2).isReg())
6817 RHS = MI.getOperand(2).getReg();
6818 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6819
6820 SmallVector<MachineInstr *, 4> SelectDebugValues;
6821 SmallSet<Register, 4> SelectDests;
6822 SelectDests.insert(MI.getOperand(0).getReg());
6823
6824 MachineInstr *LastSelectPseudo = &MI;
6825 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6826 SequenceMBBI != E; ++SequenceMBBI) {
6827 if (SequenceMBBI->isDebugInstr())
6828 continue;
6829 if (isSelectPseudo(*SequenceMBBI)) {
6830 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6831 !SequenceMBBI->getOperand(2).isReg() ||
6832 SequenceMBBI->getOperand(2).getReg() != RHS ||
6833 SequenceMBBI->getOperand(3).getImm() != CC ||
6834 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6835 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6836 break;
6837 LastSelectPseudo = &*SequenceMBBI;
6838 SequenceMBBI->collectDebugValues(SelectDebugValues);
6839 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6840 continue;
6841 }
6842 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6843 SequenceMBBI->mayLoadOrStore() ||
6844 SequenceMBBI->usesCustomInsertionHook())
6845 break;
6846 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6847 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6848 }))
6849 break;
6850 }
6851
6852 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6853 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6854 DebugLoc DL = MI.getDebugLoc();
6856
6857 MachineBasicBlock *HeadMBB = BB;
6858 MachineFunction *F = BB->getParent();
6859 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6860 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6861
6862 F->insert(I, IfFalseMBB);
6863 F->insert(I, TailMBB);
6864
6865 // Set the call frame size on entry to the new basic blocks.
6866 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6867 IfFalseMBB->setCallFrameSize(CallFrameSize);
6868 TailMBB->setCallFrameSize(CallFrameSize);
6869
6870 // Transfer debug instructions associated with the selects to TailMBB.
6871 for (MachineInstr *DebugInstr : SelectDebugValues) {
6872 TailMBB->push_back(DebugInstr->removeFromParent());
6873 }
6874
6875 // Move all instructions after the sequence to TailMBB.
6876 TailMBB->splice(TailMBB->end(), HeadMBB,
6877 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6878 // Update machine-CFG edges by transferring all successors of the current
6879 // block to the new block which will contain the Phi nodes for the selects.
6880 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6881 // Set the successors for HeadMBB.
6882 HeadMBB->addSuccessor(IfFalseMBB);
6883 HeadMBB->addSuccessor(TailMBB);
6884
6885 // Insert appropriate branch.
6886 if (MI.getOperand(2).isImm())
6887 BuildMI(HeadMBB, DL, TII.get(CC))
6888 .addReg(LHS)
6889 .addImm(MI.getOperand(2).getImm())
6890 .addMBB(TailMBB);
6891 else
6892 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6893
6894 // IfFalseMBB just falls through to TailMBB.
6895 IfFalseMBB->addSuccessor(TailMBB);
6896
6897 // Create PHIs for all of the select pseudo-instructions.
6898 auto SelectMBBI = MI.getIterator();
6899 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6900 auto InsertionPoint = TailMBB->begin();
6901 while (SelectMBBI != SelectEnd) {
6902 auto Next = std::next(SelectMBBI);
6903 if (isSelectPseudo(*SelectMBBI)) {
6904 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6905 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6906 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6907 .addReg(SelectMBBI->getOperand(4).getReg())
6908 .addMBB(HeadMBB)
6909 .addReg(SelectMBBI->getOperand(5).getReg())
6910 .addMBB(IfFalseMBB);
6911 SelectMBBI->eraseFromParent();
6912 }
6913 SelectMBBI = Next;
6914 }
6915
6916 F->getProperties().resetNoPHIs();
6917 return TailMBB;
6918}
6919
6920MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6921 MachineInstr &MI, MachineBasicBlock *BB) const {
6922 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6923 DebugLoc DL = MI.getDebugLoc();
6924
6925 switch (MI.getOpcode()) {
6926 default:
6927 llvm_unreachable("Unexpected instr type to insert");
6928 case LoongArch::DIV_W:
6929 case LoongArch::DIV_WU:
6930 case LoongArch::MOD_W:
6931 case LoongArch::MOD_WU:
6932 case LoongArch::DIV_D:
6933 case LoongArch::DIV_DU:
6934 case LoongArch::MOD_D:
6935 case LoongArch::MOD_DU:
6936 return insertDivByZeroTrap(MI, BB);
6937 break;
6938 case LoongArch::WRFCSR: {
6939 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6940 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6941 .addReg(MI.getOperand(1).getReg());
6942 MI.eraseFromParent();
6943 return BB;
6944 }
6945 case LoongArch::RDFCSR: {
6946 MachineInstr *ReadFCSR =
6947 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6948 MI.getOperand(0).getReg())
6949 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6950 ReadFCSR->getOperand(1).setIsUndef();
6951 MI.eraseFromParent();
6952 return BB;
6953 }
6954 case LoongArch::Select_GPR_Using_CC_GPR:
6955 return emitSelectPseudo(MI, BB, Subtarget);
6956 case LoongArch::BuildPairF64Pseudo:
6957 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6958 case LoongArch::SplitPairF64Pseudo:
6959 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6960 case LoongArch::PseudoVBZ:
6961 case LoongArch::PseudoVBZ_B:
6962 case LoongArch::PseudoVBZ_H:
6963 case LoongArch::PseudoVBZ_W:
6964 case LoongArch::PseudoVBZ_D:
6965 case LoongArch::PseudoVBNZ:
6966 case LoongArch::PseudoVBNZ_B:
6967 case LoongArch::PseudoVBNZ_H:
6968 case LoongArch::PseudoVBNZ_W:
6969 case LoongArch::PseudoVBNZ_D:
6970 case LoongArch::PseudoXVBZ:
6971 case LoongArch::PseudoXVBZ_B:
6972 case LoongArch::PseudoXVBZ_H:
6973 case LoongArch::PseudoXVBZ_W:
6974 case LoongArch::PseudoXVBZ_D:
6975 case LoongArch::PseudoXVBNZ:
6976 case LoongArch::PseudoXVBNZ_B:
6977 case LoongArch::PseudoXVBNZ_H:
6978 case LoongArch::PseudoXVBNZ_W:
6979 case LoongArch::PseudoXVBNZ_D:
6980 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6981 case LoongArch::PseudoXVINSGR2VR_B:
6982 case LoongArch::PseudoXVINSGR2VR_H:
6983 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6984 case LoongArch::PseudoCTPOP:
6985 return emitPseudoCTPOP(MI, BB, Subtarget);
6986 case LoongArch::PseudoVMSKLTZ_B:
6987 case LoongArch::PseudoVMSKLTZ_H:
6988 case LoongArch::PseudoVMSKLTZ_W:
6989 case LoongArch::PseudoVMSKLTZ_D:
6990 case LoongArch::PseudoVMSKGEZ_B:
6991 case LoongArch::PseudoVMSKEQZ_B:
6992 case LoongArch::PseudoVMSKNEZ_B:
6993 case LoongArch::PseudoXVMSKLTZ_B:
6994 case LoongArch::PseudoXVMSKLTZ_H:
6995 case LoongArch::PseudoXVMSKLTZ_W:
6996 case LoongArch::PseudoXVMSKLTZ_D:
6997 case LoongArch::PseudoXVMSKGEZ_B:
6998 case LoongArch::PseudoXVMSKEQZ_B:
6999 case LoongArch::PseudoXVMSKNEZ_B:
7000 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7001 case TargetOpcode::STATEPOINT:
7002 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7003 // while bl call instruction (where statepoint will be lowered at the
7004 // end) has implicit def. This def is early-clobber as it will be set at
7005 // the moment of the call and earlier than any use is read.
7006 // Add this implicit dead def here as a workaround.
7007 MI.addOperand(*MI.getMF(),
7009 LoongArch::R1, /*isDef*/ true,
7010 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7011 /*isUndef*/ false, /*isEarlyClobber*/ true));
7012 if (!Subtarget.is64Bit())
7013 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7014 return emitPatchPoint(MI, BB);
7015 }
7016}
7017
7019 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7020 unsigned *Fast) const {
7021 if (!Subtarget.hasUAL())
7022 return false;
7023
7024 // TODO: set reasonable speed number.
7025 if (Fast)
7026 *Fast = 1;
7027 return true;
7028}
7029
7030const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7031 switch ((LoongArchISD::NodeType)Opcode) {
7033 break;
7034
7035#define NODE_NAME_CASE(node) \
7036 case LoongArchISD::node: \
7037 return "LoongArchISD::" #node;
7038
7039 // TODO: Add more target-dependent nodes later.
7040 NODE_NAME_CASE(CALL)
7041 NODE_NAME_CASE(CALL_MEDIUM)
7042 NODE_NAME_CASE(CALL_LARGE)
7043 NODE_NAME_CASE(RET)
7044 NODE_NAME_CASE(TAIL)
7045 NODE_NAME_CASE(TAIL_MEDIUM)
7046 NODE_NAME_CASE(TAIL_LARGE)
7047 NODE_NAME_CASE(SELECT_CC)
7048 NODE_NAME_CASE(BR_CC)
7049 NODE_NAME_CASE(BRCOND)
7050 NODE_NAME_CASE(SLL_W)
7051 NODE_NAME_CASE(SRA_W)
7052 NODE_NAME_CASE(SRL_W)
7053 NODE_NAME_CASE(BSTRINS)
7054 NODE_NAME_CASE(BSTRPICK)
7055 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7056 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7057 NODE_NAME_CASE(FTINT)
7058 NODE_NAME_CASE(BUILD_PAIR_F64)
7059 NODE_NAME_CASE(SPLIT_PAIR_F64)
7060 NODE_NAME_CASE(REVB_2H)
7061 NODE_NAME_CASE(REVB_2W)
7062 NODE_NAME_CASE(BITREV_4B)
7063 NODE_NAME_CASE(BITREV_8B)
7064 NODE_NAME_CASE(BITREV_W)
7065 NODE_NAME_CASE(ROTR_W)
7066 NODE_NAME_CASE(ROTL_W)
7067 NODE_NAME_CASE(DIV_W)
7068 NODE_NAME_CASE(DIV_WU)
7069 NODE_NAME_CASE(MOD_W)
7070 NODE_NAME_CASE(MOD_WU)
7071 NODE_NAME_CASE(CLZ_W)
7072 NODE_NAME_CASE(CTZ_W)
7073 NODE_NAME_CASE(DBAR)
7074 NODE_NAME_CASE(IBAR)
7075 NODE_NAME_CASE(BREAK)
7076 NODE_NAME_CASE(SYSCALL)
7077 NODE_NAME_CASE(CRC_W_B_W)
7078 NODE_NAME_CASE(CRC_W_H_W)
7079 NODE_NAME_CASE(CRC_W_W_W)
7080 NODE_NAME_CASE(CRC_W_D_W)
7081 NODE_NAME_CASE(CRCC_W_B_W)
7082 NODE_NAME_CASE(CRCC_W_H_W)
7083 NODE_NAME_CASE(CRCC_W_W_W)
7084 NODE_NAME_CASE(CRCC_W_D_W)
7085 NODE_NAME_CASE(CSRRD)
7086 NODE_NAME_CASE(CSRWR)
7087 NODE_NAME_CASE(CSRXCHG)
7088 NODE_NAME_CASE(IOCSRRD_B)
7089 NODE_NAME_CASE(IOCSRRD_H)
7090 NODE_NAME_CASE(IOCSRRD_W)
7091 NODE_NAME_CASE(IOCSRRD_D)
7092 NODE_NAME_CASE(IOCSRWR_B)
7093 NODE_NAME_CASE(IOCSRWR_H)
7094 NODE_NAME_CASE(IOCSRWR_W)
7095 NODE_NAME_CASE(IOCSRWR_D)
7096 NODE_NAME_CASE(CPUCFG)
7097 NODE_NAME_CASE(MOVGR2FCSR)
7098 NODE_NAME_CASE(MOVFCSR2GR)
7099 NODE_NAME_CASE(CACOP_D)
7100 NODE_NAME_CASE(CACOP_W)
7101 NODE_NAME_CASE(VSHUF)
7102 NODE_NAME_CASE(VPICKEV)
7103 NODE_NAME_CASE(VPICKOD)
7104 NODE_NAME_CASE(VPACKEV)
7105 NODE_NAME_CASE(VPACKOD)
7106 NODE_NAME_CASE(VILVL)
7107 NODE_NAME_CASE(VILVH)
7108 NODE_NAME_CASE(VSHUF4I)
7109 NODE_NAME_CASE(VREPLVEI)
7110 NODE_NAME_CASE(VREPLGR2VR)
7111 NODE_NAME_CASE(XVPERMI)
7112 NODE_NAME_CASE(XVPERM)
7113 NODE_NAME_CASE(VPICK_SEXT_ELT)
7114 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7115 NODE_NAME_CASE(VREPLVE)
7116 NODE_NAME_CASE(VALL_ZERO)
7117 NODE_NAME_CASE(VANY_ZERO)
7118 NODE_NAME_CASE(VALL_NONZERO)
7119 NODE_NAME_CASE(VANY_NONZERO)
7120 NODE_NAME_CASE(FRECIPE)
7121 NODE_NAME_CASE(FRSQRTE)
7122 NODE_NAME_CASE(VSLLI)
7123 NODE_NAME_CASE(VSRLI)
7124 NODE_NAME_CASE(VBSLL)
7125 NODE_NAME_CASE(VBSRL)
7126 NODE_NAME_CASE(VLDREPL)
7127 NODE_NAME_CASE(VMSKLTZ)
7128 NODE_NAME_CASE(VMSKGEZ)
7129 NODE_NAME_CASE(VMSKEQZ)
7130 NODE_NAME_CASE(VMSKNEZ)
7131 NODE_NAME_CASE(XVMSKLTZ)
7132 NODE_NAME_CASE(XVMSKGEZ)
7133 NODE_NAME_CASE(XVMSKEQZ)
7134 NODE_NAME_CASE(XVMSKNEZ)
7135 NODE_NAME_CASE(VHADDW)
7136 }
7137#undef NODE_NAME_CASE
7138 return nullptr;
7139}
7140
7141//===----------------------------------------------------------------------===//
7142// Calling Convention Implementation
7143//===----------------------------------------------------------------------===//
7144
7145// Eight general-purpose registers a0-a7 used for passing integer arguments,
7146// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7147// fixed-point arguments, and floating-point arguments when no FPR is available
7148// or with soft float ABI.
7149const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7150 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7151 LoongArch::R10, LoongArch::R11};
7152// Eight floating-point registers fa0-fa7 used for passing floating-point
7153// arguments, and fa0-fa1 are also used to return values.
7154const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7155 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7156 LoongArch::F6, LoongArch::F7};
7157// FPR32 and FPR64 alias each other.
7159 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7160 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7161
7162const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7163 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7164 LoongArch::VR6, LoongArch::VR7};
7165
7166const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7167 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7168 LoongArch::XR6, LoongArch::XR7};
7169
7170// Pass a 2*GRLen argument that has been split into two GRLen values through
7171// registers or the stack as necessary.
7172static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7173 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7174 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7175 ISD::ArgFlagsTy ArgFlags2) {
7176 unsigned GRLenInBytes = GRLen / 8;
7177 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7178 // At least one half can be passed via register.
7179 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7180 VA1.getLocVT(), CCValAssign::Full));
7181 } else {
7182 // Both halves must be passed on the stack, with proper alignment.
7183 Align StackAlign =
7184 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7185 State.addLoc(
7187 State.AllocateStack(GRLenInBytes, StackAlign),
7188 VA1.getLocVT(), CCValAssign::Full));
7189 State.addLoc(CCValAssign::getMem(
7190 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7191 LocVT2, CCValAssign::Full));
7192 return false;
7193 }
7194 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7195 // The second half can also be passed via register.
7196 State.addLoc(
7197 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7198 } else {
7199 // The second half is passed via the stack, without additional alignment.
7200 State.addLoc(CCValAssign::getMem(
7201 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7202 LocVT2, CCValAssign::Full));
7203 }
7204 return false;
7205}
7206
7207// Implements the LoongArch calling convention. Returns true upon failure.
7209 unsigned ValNo, MVT ValVT,
7210 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7211 CCState &State, bool IsRet, Type *OrigTy) {
7212 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7213 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7214 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7215 MVT LocVT = ValVT;
7216
7217 // Any return value split into more than two values can't be returned
7218 // directly.
7219 if (IsRet && ValNo > 1)
7220 return true;
7221
7222 // If passing a variadic argument, or if no FPR is available.
7223 bool UseGPRForFloat = true;
7224
7225 switch (ABI) {
7226 default:
7227 llvm_unreachable("Unexpected ABI");
7228 break;
7233 UseGPRForFloat = ArgFlags.isVarArg();
7234 break;
7237 break;
7238 }
7239
7240 // If this is a variadic argument, the LoongArch calling convention requires
7241 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7242 // byte alignment. An aligned register should be used regardless of whether
7243 // the original argument was split during legalisation or not. The argument
7244 // will not be passed by registers if the original type is larger than
7245 // 2*GRLen, so the register alignment rule does not apply.
7246 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7247 if (ArgFlags.isVarArg() &&
7248 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7249 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7250 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7251 // Skip 'odd' register if necessary.
7252 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7253 State.AllocateReg(ArgGPRs);
7254 }
7255
7256 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7257 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7258 State.getPendingArgFlags();
7259
7260 assert(PendingLocs.size() == PendingArgFlags.size() &&
7261 "PendingLocs and PendingArgFlags out of sync");
7262
7263 // FPR32 and FPR64 alias each other.
7264 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7265 UseGPRForFloat = true;
7266
7267 if (UseGPRForFloat && ValVT == MVT::f32) {
7268 LocVT = GRLenVT;
7269 LocInfo = CCValAssign::BCvt;
7270 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7271 LocVT = MVT::i64;
7272 LocInfo = CCValAssign::BCvt;
7273 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7274 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7275 // registers are exhausted.
7276 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7277 // Depending on available argument GPRS, f64 may be passed in a pair of
7278 // GPRs, split between a GPR and the stack, or passed completely on the
7279 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7280 // cases.
7281 MCRegister Reg = State.AllocateReg(ArgGPRs);
7282 if (!Reg) {
7283 int64_t StackOffset = State.AllocateStack(8, Align(8));
7284 State.addLoc(
7285 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7286 return false;
7287 }
7288 LocVT = MVT::i32;
7289 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7290 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7291 if (HiReg) {
7292 State.addLoc(
7293 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7294 } else {
7295 int64_t StackOffset = State.AllocateStack(4, Align(4));
7296 State.addLoc(
7297 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7298 }
7299 return false;
7300 }
7301
7302 // Split arguments might be passed indirectly, so keep track of the pending
7303 // values.
7304 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7305 LocVT = GRLenVT;
7306 LocInfo = CCValAssign::Indirect;
7307 PendingLocs.push_back(
7308 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7309 PendingArgFlags.push_back(ArgFlags);
7310 if (!ArgFlags.isSplitEnd()) {
7311 return false;
7312 }
7313 }
7314
7315 // If the split argument only had two elements, it should be passed directly
7316 // in registers or on the stack.
7317 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7318 PendingLocs.size() <= 2) {
7319 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7320 // Apply the normal calling convention rules to the first half of the
7321 // split argument.
7322 CCValAssign VA = PendingLocs[0];
7323 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7324 PendingLocs.clear();
7325 PendingArgFlags.clear();
7326 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7327 ArgFlags);
7328 }
7329
7330 // Allocate to a register if possible, or else a stack slot.
7331 Register Reg;
7332 unsigned StoreSizeBytes = GRLen / 8;
7333 Align StackAlign = Align(GRLen / 8);
7334
7335 if (ValVT == MVT::f32 && !UseGPRForFloat)
7336 Reg = State.AllocateReg(ArgFPR32s);
7337 else if (ValVT == MVT::f64 && !UseGPRForFloat)
7338 Reg = State.AllocateReg(ArgFPR64s);
7339 else if (ValVT.is128BitVector())
7340 Reg = State.AllocateReg(ArgVRs);
7341 else if (ValVT.is256BitVector())
7342 Reg = State.AllocateReg(ArgXRs);
7343 else
7344 Reg = State.AllocateReg(ArgGPRs);
7345
7346 unsigned StackOffset =
7347 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7348
7349 // If we reach this point and PendingLocs is non-empty, we must be at the
7350 // end of a split argument that must be passed indirectly.
7351 if (!PendingLocs.empty()) {
7352 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7353 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7354 for (auto &It : PendingLocs) {
7355 if (Reg)
7356 It.convertToReg(Reg);
7357 else
7358 It.convertToMem(StackOffset);
7359 State.addLoc(It);
7360 }
7361 PendingLocs.clear();
7362 PendingArgFlags.clear();
7363 return false;
7364 }
7365 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7366 "Expected an GRLenVT at this stage");
7367
7368 if (Reg) {
7369 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7370 return false;
7371 }
7372
7373 // When a floating-point value is passed on the stack, no bit-cast is needed.
7374 if (ValVT.isFloatingPoint()) {
7375 LocVT = ValVT;
7376 LocInfo = CCValAssign::Full;
7377 }
7378
7379 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7380 return false;
7381}
7382
7383void LoongArchTargetLowering::analyzeInputArgs(
7384 MachineFunction &MF, CCState &CCInfo,
7385 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7386 LoongArchCCAssignFn Fn) const {
7387 FunctionType *FType = MF.getFunction().getFunctionType();
7388 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7389 MVT ArgVT = Ins[i].VT;
7390 Type *ArgTy = nullptr;
7391 if (IsRet)
7392 ArgTy = FType->getReturnType();
7393 else if (Ins[i].isOrigArg())
7394 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7396 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7397 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7398 CCInfo, IsRet, ArgTy)) {
7399 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7400 << '\n');
7401 llvm_unreachable("");
7402 }
7403 }
7404}
7405
7406void LoongArchTargetLowering::analyzeOutputArgs(
7407 MachineFunction &MF, CCState &CCInfo,
7408 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7409 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7410 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7411 MVT ArgVT = Outs[i].VT;
7412 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7414 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7415 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7416 CCInfo, IsRet, OrigTy)) {
7417 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7418 << "\n");
7419 llvm_unreachable("");
7420 }
7421 }
7422}
7423
7424// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7425// values.
7427 const CCValAssign &VA, const SDLoc &DL) {
7428 switch (VA.getLocInfo()) {
7429 default:
7430 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7431 case CCValAssign::Full:
7433 break;
7434 case CCValAssign::BCvt:
7435 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7436 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7437 else
7438 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7439 break;
7440 }
7441 return Val;
7442}
7443
7445 const CCValAssign &VA, const SDLoc &DL,
7446 const ISD::InputArg &In,
7447 const LoongArchTargetLowering &TLI) {
7450 EVT LocVT = VA.getLocVT();
7451 SDValue Val;
7452 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7453 Register VReg = RegInfo.createVirtualRegister(RC);
7454 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7455 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7456
7457 // If input is sign extended from 32 bits, note it for the OptW pass.
7458 if (In.isOrigArg()) {
7459 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7460 if (OrigArg->getType()->isIntegerTy()) {
7461 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7462 // An input zero extended from i31 can also be considered sign extended.
7463 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7464 (BitWidth < 32 && In.Flags.isZExt())) {
7467 LAFI->addSExt32Register(VReg);
7468 }
7469 }
7470 }
7471
7472 return convertLocVTToValVT(DAG, Val, VA, DL);
7473}
7474
7475// The caller is responsible for loading the full value if the argument is
7476// passed with CCValAssign::Indirect.
7478 const CCValAssign &VA, const SDLoc &DL) {
7480 MachineFrameInfo &MFI = MF.getFrameInfo();
7481 EVT ValVT = VA.getValVT();
7482 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7483 /*IsImmutable=*/true);
7484 SDValue FIN = DAG.getFrameIndex(
7486
7487 ISD::LoadExtType ExtType;
7488 switch (VA.getLocInfo()) {
7489 default:
7490 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7491 case CCValAssign::Full:
7493 case CCValAssign::BCvt:
7494 ExtType = ISD::NON_EXTLOAD;
7495 break;
7496 }
7497 return DAG.getExtLoad(
7498 ExtType, DL, VA.getLocVT(), Chain, FIN,
7500}
7501
7503 const CCValAssign &VA,
7504 const CCValAssign &HiVA,
7505 const SDLoc &DL) {
7506 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7507 "Unexpected VA");
7509 MachineFrameInfo &MFI = MF.getFrameInfo();
7511
7512 assert(VA.isRegLoc() && "Expected register VA assignment");
7513
7514 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7515 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7516 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7517 SDValue Hi;
7518 if (HiVA.isMemLoc()) {
7519 // Second half of f64 is passed on the stack.
7520 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7521 /*IsImmutable=*/true);
7522 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7523 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7525 } else {
7526 // Second half of f64 is passed in another GPR.
7527 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7528 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7529 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7530 }
7531 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7532}
7533
7535 const CCValAssign &VA, const SDLoc &DL) {
7536 EVT LocVT = VA.getLocVT();
7537
7538 switch (VA.getLocInfo()) {
7539 default:
7540 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7541 case CCValAssign::Full:
7542 break;
7543 case CCValAssign::BCvt:
7544 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7545 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7546 else
7547 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7548 break;
7549 }
7550 return Val;
7551}
7552
7553static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7554 CCValAssign::LocInfo LocInfo,
7555 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7556 CCState &State) {
7557 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7558 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7559 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7560 static const MCPhysReg GPRList[] = {
7561 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7562 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7563 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7564 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7565 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7566 return false;
7567 }
7568 }
7569
7570 if (LocVT == MVT::f32) {
7571 // Pass in STG registers: F1, F2, F3, F4
7572 // fs0,fs1,fs2,fs3
7573 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7574 LoongArch::F26, LoongArch::F27};
7575 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7576 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7577 return false;
7578 }
7579 }
7580
7581 if (LocVT == MVT::f64) {
7582 // Pass in STG registers: D1, D2, D3, D4
7583 // fs4,fs5,fs6,fs7
7584 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7585 LoongArch::F30_64, LoongArch::F31_64};
7586 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7587 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7588 return false;
7589 }
7590 }
7591
7592 report_fatal_error("No registers left in GHC calling convention");
7593 return true;
7594}
7595
7596// Transform physical registers into virtual registers.
7598 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7599 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7600 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7601
7603
7604 switch (CallConv) {
7605 default:
7606 llvm_unreachable("Unsupported calling convention");
7607 case CallingConv::C:
7608 case CallingConv::Fast:
7610 break;
7611 case CallingConv::GHC:
7612 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7613 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7615 "GHC calling convention requires the F and D extensions");
7616 }
7617
7618 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7619 MVT GRLenVT = Subtarget.getGRLenVT();
7620 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7621 // Used with varargs to acumulate store chains.
7622 std::vector<SDValue> OutChains;
7623
7624 // Assign locations to all of the incoming arguments.
7626 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7627
7628 if (CallConv == CallingConv::GHC)
7630 else
7631 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7632
7633 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7634 CCValAssign &VA = ArgLocs[i];
7635 SDValue ArgValue;
7636 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7637 // case.
7638 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7639 assert(VA.needsCustom());
7640 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7641 } else if (VA.isRegLoc())
7642 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7643 else
7644 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7645 if (VA.getLocInfo() == CCValAssign::Indirect) {
7646 // If the original argument was split and passed by reference, we need to
7647 // load all parts of it here (using the same address).
7648 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7650 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7651 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7652 assert(ArgPartOffset == 0);
7653 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7654 CCValAssign &PartVA = ArgLocs[i + 1];
7655 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7656 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7657 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7658 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7660 ++i;
7661 ++InsIdx;
7662 }
7663 continue;
7664 }
7665 InVals.push_back(ArgValue);
7666 }
7667
7668 if (IsVarArg) {
7670 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7671 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7672 MachineFrameInfo &MFI = MF.getFrameInfo();
7673 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7674 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7675
7676 // Offset of the first variable argument from stack pointer, and size of
7677 // the vararg save area. For now, the varargs save area is either zero or
7678 // large enough to hold a0-a7.
7679 int VaArgOffset, VarArgsSaveSize;
7680
7681 // If all registers are allocated, then all varargs must be passed on the
7682 // stack and we don't need to save any argregs.
7683 if (ArgRegs.size() == Idx) {
7684 VaArgOffset = CCInfo.getStackSize();
7685 VarArgsSaveSize = 0;
7686 } else {
7687 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7688 VaArgOffset = -VarArgsSaveSize;
7689 }
7690
7691 // Record the frame index of the first variable argument
7692 // which is a value necessary to VASTART.
7693 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7694 LoongArchFI->setVarArgsFrameIndex(FI);
7695
7696 // If saving an odd number of registers then create an extra stack slot to
7697 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7698 // offsets to even-numbered registered remain 2*GRLen-aligned.
7699 if (Idx % 2) {
7700 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7701 true);
7702 VarArgsSaveSize += GRLenInBytes;
7703 }
7704
7705 // Copy the integer registers that may have been used for passing varargs
7706 // to the vararg save area.
7707 for (unsigned I = Idx; I < ArgRegs.size();
7708 ++I, VaArgOffset += GRLenInBytes) {
7709 const Register Reg = RegInfo.createVirtualRegister(RC);
7710 RegInfo.addLiveIn(ArgRegs[I], Reg);
7711 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7712 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7713 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7714 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7716 cast<StoreSDNode>(Store.getNode())
7717 ->getMemOperand()
7718 ->setValue((Value *)nullptr);
7719 OutChains.push_back(Store);
7720 }
7721 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7722 }
7723
7724 // All stores are grouped in one node to allow the matching between
7725 // the size of Ins and InVals. This only happens for vararg functions.
7726 if (!OutChains.empty()) {
7727 OutChains.push_back(Chain);
7728 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7729 }
7730
7731 return Chain;
7732}
7733
7735 return CI->isTailCall();
7736}
7737
7738// Check if the return value is used as only a return value, as otherwise
7739// we can't perform a tail-call.
7741 SDValue &Chain) const {
7742 if (N->getNumValues() != 1)
7743 return false;
7744 if (!N->hasNUsesOfValue(1, 0))
7745 return false;
7746
7747 SDNode *Copy = *N->user_begin();
7748 if (Copy->getOpcode() != ISD::CopyToReg)
7749 return false;
7750
7751 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7752 // isn't safe to perform a tail call.
7753 if (Copy->getGluedNode())
7754 return false;
7755
7756 // The copy must be used by a LoongArchISD::RET, and nothing else.
7757 bool HasRet = false;
7758 for (SDNode *Node : Copy->users()) {
7759 if (Node->getOpcode() != LoongArchISD::RET)
7760 return false;
7761 HasRet = true;
7762 }
7763
7764 if (!HasRet)
7765 return false;
7766
7767 Chain = Copy->getOperand(0);
7768 return true;
7769}
7770
7771// Check whether the call is eligible for tail call optimization.
7772bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7773 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7774 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7775
7776 auto CalleeCC = CLI.CallConv;
7777 auto &Outs = CLI.Outs;
7778 auto &Caller = MF.getFunction();
7779 auto CallerCC = Caller.getCallingConv();
7780
7781 // Do not tail call opt if the stack is used to pass parameters.
7782 if (CCInfo.getStackSize() != 0)
7783 return false;
7784
7785 // Do not tail call opt if any parameters need to be passed indirectly.
7786 for (auto &VA : ArgLocs)
7787 if (VA.getLocInfo() == CCValAssign::Indirect)
7788 return false;
7789
7790 // Do not tail call opt if either caller or callee uses struct return
7791 // semantics.
7792 auto IsCallerStructRet = Caller.hasStructRetAttr();
7793 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7794 if (IsCallerStructRet || IsCalleeStructRet)
7795 return false;
7796
7797 // Do not tail call opt if either the callee or caller has a byval argument.
7798 for (auto &Arg : Outs)
7799 if (Arg.Flags.isByVal())
7800 return false;
7801
7802 // The callee has to preserve all registers the caller needs to preserve.
7803 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7804 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7805 if (CalleeCC != CallerCC) {
7806 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7807 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7808 return false;
7809 }
7810 return true;
7811}
7812
7814 return DAG.getDataLayout().getPrefTypeAlign(
7815 VT.getTypeForEVT(*DAG.getContext()));
7816}
7817
7818// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7819// and output parameter nodes.
7820SDValue
7822 SmallVectorImpl<SDValue> &InVals) const {
7823 SelectionDAG &DAG = CLI.DAG;
7824 SDLoc &DL = CLI.DL;
7826 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7828 SDValue Chain = CLI.Chain;
7829 SDValue Callee = CLI.Callee;
7830 CallingConv::ID CallConv = CLI.CallConv;
7831 bool IsVarArg = CLI.IsVarArg;
7832 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7833 MVT GRLenVT = Subtarget.getGRLenVT();
7834 bool &IsTailCall = CLI.IsTailCall;
7835
7837
7838 // Analyze the operands of the call, assigning locations to each operand.
7840 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7841
7842 if (CallConv == CallingConv::GHC)
7843 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7844 else
7845 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7846
7847 // Check if it's really possible to do a tail call.
7848 if (IsTailCall)
7849 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7850
7851 if (IsTailCall)
7852 ++NumTailCalls;
7853 else if (CLI.CB && CLI.CB->isMustTailCall())
7854 report_fatal_error("failed to perform tail call elimination on a call "
7855 "site marked musttail");
7856
7857 // Get a count of how many bytes are to be pushed on the stack.
7858 unsigned NumBytes = ArgCCInfo.getStackSize();
7859
7860 // Create local copies for byval args.
7861 SmallVector<SDValue> ByValArgs;
7862 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7863 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7864 if (!Flags.isByVal())
7865 continue;
7866
7867 SDValue Arg = OutVals[i];
7868 unsigned Size = Flags.getByValSize();
7869 Align Alignment = Flags.getNonZeroByValAlign();
7870
7871 int FI =
7872 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7873 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7874 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7875
7876 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7877 /*IsVolatile=*/false,
7878 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7880 ByValArgs.push_back(FIPtr);
7881 }
7882
7883 if (!IsTailCall)
7884 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7885
7886 // Copy argument values to their designated locations.
7888 SmallVector<SDValue> MemOpChains;
7889 SDValue StackPtr;
7890 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7891 ++i, ++OutIdx) {
7892 CCValAssign &VA = ArgLocs[i];
7893 SDValue ArgValue = OutVals[OutIdx];
7894 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7895
7896 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7897 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7898 assert(VA.isRegLoc() && "Expected register VA assignment");
7899 assert(VA.needsCustom());
7900 SDValue SplitF64 =
7902 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7903 SDValue Lo = SplitF64.getValue(0);
7904 SDValue Hi = SplitF64.getValue(1);
7905
7906 Register RegLo = VA.getLocReg();
7907 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7908
7909 // Get the CCValAssign for the Hi part.
7910 CCValAssign &HiVA = ArgLocs[++i];
7911
7912 if (HiVA.isMemLoc()) {
7913 // Second half of f64 is passed on the stack.
7914 if (!StackPtr.getNode())
7915 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7917 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7918 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7919 // Emit the store.
7920 MemOpChains.push_back(DAG.getStore(
7921 Chain, DL, Hi, Address,
7923 } else {
7924 // Second half of f64 is passed in another GPR.
7925 Register RegHigh = HiVA.getLocReg();
7926 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7927 }
7928 continue;
7929 }
7930
7931 // Promote the value if needed.
7932 // For now, only handle fully promoted and indirect arguments.
7933 if (VA.getLocInfo() == CCValAssign::Indirect) {
7934 // Store the argument in a stack slot and pass its address.
7935 Align StackAlign =
7936 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7937 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7938 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7939 // If the original argument was split and passed by reference, we need to
7940 // store the required parts of it here (and pass just one address).
7941 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7942 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7943 assert(ArgPartOffset == 0);
7944 // Calculate the total size to store. We don't have access to what we're
7945 // actually storing other than performing the loop and collecting the
7946 // info.
7948 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7949 SDValue PartValue = OutVals[OutIdx + 1];
7950 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7951 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7952 EVT PartVT = PartValue.getValueType();
7953
7954 StoredSize += PartVT.getStoreSize();
7955 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7956 Parts.push_back(std::make_pair(PartValue, Offset));
7957 ++i;
7958 ++OutIdx;
7959 }
7960 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7961 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7962 MemOpChains.push_back(
7963 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7965 for (const auto &Part : Parts) {
7966 SDValue PartValue = Part.first;
7967 SDValue PartOffset = Part.second;
7969 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7970 MemOpChains.push_back(
7971 DAG.getStore(Chain, DL, PartValue, Address,
7973 }
7974 ArgValue = SpillSlot;
7975 } else {
7976 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7977 }
7978
7979 // Use local copy if it is a byval arg.
7980 if (Flags.isByVal())
7981 ArgValue = ByValArgs[j++];
7982
7983 if (VA.isRegLoc()) {
7984 // Queue up the argument copies and emit them at the end.
7985 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7986 } else {
7987 assert(VA.isMemLoc() && "Argument not register or memory");
7988 assert(!IsTailCall && "Tail call not allowed if stack is used "
7989 "for passing parameters");
7990
7991 // Work out the address of the stack slot.
7992 if (!StackPtr.getNode())
7993 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7995 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7997
7998 // Emit the store.
7999 MemOpChains.push_back(
8000 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8001 }
8002 }
8003
8004 // Join the stores, which are independent of one another.
8005 if (!MemOpChains.empty())
8006 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8007
8008 SDValue Glue;
8009
8010 // Build a sequence of copy-to-reg nodes, chained and glued together.
8011 for (auto &Reg : RegsToPass) {
8012 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8013 Glue = Chain.getValue(1);
8014 }
8015
8016 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8017 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8018 // split it and then direct call can be matched by PseudoCALL.
8020 const GlobalValue *GV = S->getGlobal();
8021 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8024 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8025 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8026 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8029 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8030 }
8031
8032 // The first call operand is the chain and the second is the target address.
8034 Ops.push_back(Chain);
8035 Ops.push_back(Callee);
8036
8037 // Add argument registers to the end of the list so that they are
8038 // known live into the call.
8039 for (auto &Reg : RegsToPass)
8040 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8041
8042 if (!IsTailCall) {
8043 // Add a register mask operand representing the call-preserved registers.
8044 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8045 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8046 assert(Mask && "Missing call preserved mask for calling convention");
8047 Ops.push_back(DAG.getRegisterMask(Mask));
8048 }
8049
8050 // Glue the call to the argument copies, if any.
8051 if (Glue.getNode())
8052 Ops.push_back(Glue);
8053
8054 // Emit the call.
8055 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8056 unsigned Op;
8057 switch (DAG.getTarget().getCodeModel()) {
8058 default:
8059 report_fatal_error("Unsupported code model");
8060 case CodeModel::Small:
8061 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8062 break;
8063 case CodeModel::Medium:
8064 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8066 break;
8067 case CodeModel::Large:
8068 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8070 break;
8071 }
8072
8073 if (IsTailCall) {
8075 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8076 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8077 return Ret;
8078 }
8079
8080 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8081 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8082 Glue = Chain.getValue(1);
8083
8084 // Mark the end of the call, which is glued to the call itself.
8085 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8086 Glue = Chain.getValue(1);
8087
8088 // Assign locations to each value returned by this call.
8090 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8091 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8092
8093 // Copy all of the result registers out of their specified physreg.
8094 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8095 auto &VA = RVLocs[i];
8096 // Copy the value out.
8097 SDValue RetValue =
8098 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8099 // Glue the RetValue to the end of the call sequence.
8100 Chain = RetValue.getValue(1);
8101 Glue = RetValue.getValue(2);
8102
8103 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8104 assert(VA.needsCustom());
8105 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8106 MVT::i32, Glue);
8107 Chain = RetValue2.getValue(1);
8108 Glue = RetValue2.getValue(2);
8109 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8110 RetValue, RetValue2);
8111 } else
8112 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8113
8114 InVals.push_back(RetValue);
8115 }
8116
8117 return Chain;
8118}
8119
8121 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8122 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8123 const Type *RetTy) const {
8125 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8126
8127 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8128 LoongArchABI::ABI ABI =
8129 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8130 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8131 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8132 return false;
8133 }
8134 return true;
8135}
8136
8138 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8140 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8141 SelectionDAG &DAG) const {
8142 // Stores the assignment of the return value to a location.
8144
8145 // Info about the registers and stack slot.
8146 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8147 *DAG.getContext());
8148
8149 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8150 nullptr, CC_LoongArch);
8151 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8152 report_fatal_error("GHC functions return void only");
8153 SDValue Glue;
8154 SmallVector<SDValue, 4> RetOps(1, Chain);
8155
8156 // Copy the result values into the output registers.
8157 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8158 SDValue Val = OutVals[OutIdx];
8159 CCValAssign &VA = RVLocs[i];
8160 assert(VA.isRegLoc() && "Can only return in registers!");
8161
8162 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8163 // Handle returning f64 on LA32D with a soft float ABI.
8164 assert(VA.isRegLoc() && "Expected return via registers");
8165 assert(VA.needsCustom());
8167 DAG.getVTList(MVT::i32, MVT::i32), Val);
8168 SDValue Lo = SplitF64.getValue(0);
8169 SDValue Hi = SplitF64.getValue(1);
8170 Register RegLo = VA.getLocReg();
8171 Register RegHi = RVLocs[++i].getLocReg();
8172
8173 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8174 Glue = Chain.getValue(1);
8175 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8176 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8177 Glue = Chain.getValue(1);
8178 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8179 } else {
8180 // Handle a 'normal' return.
8181 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8182 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8183
8184 // Guarantee that all emitted copies are stuck together.
8185 Glue = Chain.getValue(1);
8186 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8187 }
8188 }
8189
8190 RetOps[0] = Chain; // Update chain.
8191
8192 // Add the glue node if we have it.
8193 if (Glue.getNode())
8194 RetOps.push_back(Glue);
8195
8196 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8197}
8198
8200 EVT VT) const {
8201 if (!Subtarget.hasExtLSX())
8202 return false;
8203
8204 if (VT == MVT::f32) {
8205 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8206 return (masked == 0x3e000000 || masked == 0x40000000);
8207 }
8208
8209 if (VT == MVT::f64) {
8210 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8211 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8212 }
8213
8214 return false;
8215}
8216
8217bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8218 bool ForCodeSize) const {
8219 // TODO: Maybe need more checks here after vector extension is supported.
8220 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8221 return false;
8222 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8223 return false;
8224 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8225}
8226
8228 return true;
8229}
8230
8232 return true;
8233}
8234
8235bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8236 const Instruction *I) const {
8237 if (!Subtarget.is64Bit())
8238 return isa<LoadInst>(I) || isa<StoreInst>(I);
8239
8240 if (isa<LoadInst>(I))
8241 return true;
8242
8243 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8244 // require fences beacuse we can use amswap_db.[w/d].
8245 Type *Ty = I->getOperand(0)->getType();
8246 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8247 unsigned Size = Ty->getIntegerBitWidth();
8248 return (Size == 8 || Size == 16);
8249 }
8250
8251 return false;
8252}
8253
8255 LLVMContext &Context,
8256 EVT VT) const {
8257 if (!VT.isVector())
8258 return getPointerTy(DL);
8260}
8261
8263 // TODO: Support vectors.
8264 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8265}
8266
8268 const CallInst &I,
8269 MachineFunction &MF,
8270 unsigned Intrinsic) const {
8271 switch (Intrinsic) {
8272 default:
8273 return false;
8274 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8275 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8276 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8277 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8278 Info.opc = ISD::INTRINSIC_W_CHAIN;
8279 Info.memVT = MVT::i32;
8280 Info.ptrVal = I.getArgOperand(0);
8281 Info.offset = 0;
8282 Info.align = Align(4);
8285 return true;
8286 // TODO: Add more Intrinsics later.
8287 }
8288}
8289
8290// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8291// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8292// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8293// regression, we need to implement it manually.
8296
8298 Op == AtomicRMWInst::And) &&
8299 "Unable to expand");
8300 unsigned MinWordSize = 4;
8301
8302 IRBuilder<> Builder(AI);
8303 LLVMContext &Ctx = Builder.getContext();
8304 const DataLayout &DL = AI->getDataLayout();
8305 Type *ValueType = AI->getType();
8306 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8307
8308 Value *Addr = AI->getPointerOperand();
8309 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8310 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8311
8312 Value *AlignedAddr = Builder.CreateIntrinsic(
8313 Intrinsic::ptrmask, {PtrTy, IntTy},
8314 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8315 "AlignedAddr");
8316
8317 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8318 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8319 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8320 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8321 Value *Mask = Builder.CreateShl(
8322 ConstantInt::get(WordType,
8323 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8324 ShiftAmt, "Mask");
8325 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8326 Value *ValOperand_Shifted =
8327 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8328 ShiftAmt, "ValOperand_Shifted");
8329 Value *NewOperand;
8330 if (Op == AtomicRMWInst::And)
8331 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8332 else
8333 NewOperand = ValOperand_Shifted;
8334
8335 AtomicRMWInst *NewAI =
8336 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8337 AI->getOrdering(), AI->getSyncScopeID());
8338
8339 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8340 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8341 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8342 AI->replaceAllUsesWith(FinalOldResult);
8343 AI->eraseFromParent();
8344}
8345
8348 // TODO: Add more AtomicRMWInst that needs to be extended.
8349
8350 // Since floating-point operation requires a non-trivial set of data
8351 // operations, use CmpXChg to expand.
8352 if (AI->isFloatingPointOperation() ||
8358
8359 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8362 AI->getOperation() == AtomicRMWInst::Sub)) {
8364 }
8365
8366 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8367 if (Subtarget.hasLAMCAS()) {
8368 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8372 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8374 }
8375
8376 if (Size == 8 || Size == 16)
8379}
8380
8381static Intrinsic::ID
8383 AtomicRMWInst::BinOp BinOp) {
8384 if (GRLen == 64) {
8385 switch (BinOp) {
8386 default:
8387 llvm_unreachable("Unexpected AtomicRMW BinOp");
8389 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8390 case AtomicRMWInst::Add:
8391 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8392 case AtomicRMWInst::Sub:
8393 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8395 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8397 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8399 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8400 case AtomicRMWInst::Max:
8401 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8402 case AtomicRMWInst::Min:
8403 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8404 // TODO: support other AtomicRMWInst.
8405 }
8406 }
8407
8408 if (GRLen == 32) {
8409 switch (BinOp) {
8410 default:
8411 llvm_unreachable("Unexpected AtomicRMW BinOp");
8413 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8414 case AtomicRMWInst::Add:
8415 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8416 case AtomicRMWInst::Sub:
8417 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8419 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8421 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8423 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8424 case AtomicRMWInst::Max:
8425 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8426 case AtomicRMWInst::Min:
8427 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8428 // TODO: support other AtomicRMWInst.
8429 }
8430 }
8431
8432 llvm_unreachable("Unexpected GRLen\n");
8433}
8434
8437 AtomicCmpXchgInst *CI) const {
8438
8439 if (Subtarget.hasLAMCAS())
8441
8443 if (Size == 8 || Size == 16)
8446}
8447
8449 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8450 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8451 unsigned GRLen = Subtarget.getGRLen();
8452 AtomicOrdering FailOrd = CI->getFailureOrdering();
8453 Value *FailureOrdering =
8454 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8455 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8456 if (GRLen == 64) {
8457 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8458 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8459 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8460 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8461 }
8462 Type *Tys[] = {AlignedAddr->getType()};
8463 Value *Result = Builder.CreateIntrinsic(
8464 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8465 if (GRLen == 64)
8466 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8467 return Result;
8468}
8469
8471 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8472 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8473 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8474 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8475 // mask, as this produces better code than the LL/SC loop emitted by
8476 // int_loongarch_masked_atomicrmw_xchg.
8477 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8480 if (CVal->isZero())
8481 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8482 Builder.CreateNot(Mask, "Inv_Mask"),
8483 AI->getAlign(), Ord);
8484 if (CVal->isMinusOne())
8485 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8486 AI->getAlign(), Ord);
8487 }
8488
8489 unsigned GRLen = Subtarget.getGRLen();
8490 Value *Ordering =
8491 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8492 Type *Tys[] = {AlignedAddr->getType()};
8494 AI->getModule(),
8496
8497 if (GRLen == 64) {
8498 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8499 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8500 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8501 }
8502
8503 Value *Result;
8504
8505 // Must pass the shift amount needed to sign extend the loaded value prior
8506 // to performing a signed comparison for min/max. ShiftAmt is the number of
8507 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8508 // is the number of bits to left+right shift the value in order to
8509 // sign-extend.
8510 if (AI->getOperation() == AtomicRMWInst::Min ||
8512 const DataLayout &DL = AI->getDataLayout();
8513 unsigned ValWidth =
8514 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8515 Value *SextShamt =
8516 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8517 Result = Builder.CreateCall(LlwOpScwLoop,
8518 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8519 } else {
8520 Result =
8521 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8522 }
8523
8524 if (GRLen == 64)
8525 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8526 return Result;
8527}
8528
8530 const MachineFunction &MF, EVT VT) const {
8531 VT = VT.getScalarType();
8532
8533 if (!VT.isSimple())
8534 return false;
8535
8536 switch (VT.getSimpleVT().SimpleTy) {
8537 case MVT::f32:
8538 case MVT::f64:
8539 return true;
8540 default:
8541 break;
8542 }
8543
8544 return false;
8545}
8546
8548 const Constant *PersonalityFn) const {
8549 return LoongArch::R4;
8550}
8551
8553 const Constant *PersonalityFn) const {
8554 return LoongArch::R5;
8555}
8556
8557//===----------------------------------------------------------------------===//
8558// Target Optimization Hooks
8559//===----------------------------------------------------------------------===//
8560
8562 const LoongArchSubtarget &Subtarget) {
8563 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8564 // IEEE float has 23 digits and double has 52 digits.
8565 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8566 return RefinementSteps;
8567}
8568
8570 SelectionDAG &DAG, int Enabled,
8571 int &RefinementSteps,
8572 bool &UseOneConstNR,
8573 bool Reciprocal) const {
8574 if (Subtarget.hasFrecipe()) {
8575 SDLoc DL(Operand);
8576 EVT VT = Operand.getValueType();
8577
8578 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8579 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8580 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8581 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8582 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8583
8584 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8585 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8586
8587 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8588 if (Reciprocal)
8589 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8590
8591 return Estimate;
8592 }
8593 }
8594
8595 return SDValue();
8596}
8597
8599 SelectionDAG &DAG,
8600 int Enabled,
8601 int &RefinementSteps) const {
8602 if (Subtarget.hasFrecipe()) {
8603 SDLoc DL(Operand);
8604 EVT VT = Operand.getValueType();
8605
8606 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8607 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8608 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8609 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8610 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8611
8612 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8613 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8614
8615 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8616 }
8617 }
8618
8619 return SDValue();
8620}
8621
8622//===----------------------------------------------------------------------===//
8623// LoongArch Inline Assembly Support
8624//===----------------------------------------------------------------------===//
8625
8627LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8628 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8629 //
8630 // 'f': A floating-point register (if available).
8631 // 'k': A memory operand whose address is formed by a base register and
8632 // (optionally scaled) index register.
8633 // 'l': A signed 16-bit constant.
8634 // 'm': A memory operand whose address is formed by a base register and
8635 // offset that is suitable for use in instructions with the same
8636 // addressing mode as st.w and ld.w.
8637 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8638 // instruction)
8639 // 'I': A signed 12-bit constant (for arithmetic instructions).
8640 // 'J': Integer zero.
8641 // 'K': An unsigned 12-bit constant (for logic instructions).
8642 // "ZB": An address that is held in a general-purpose register. The offset is
8643 // zero.
8644 // "ZC": A memory operand whose address is formed by a base register and
8645 // offset that is suitable for use in instructions with the same
8646 // addressing mode as ll.w and sc.w.
8647 if (Constraint.size() == 1) {
8648 switch (Constraint[0]) {
8649 default:
8650 break;
8651 case 'f':
8652 case 'q':
8653 return C_RegisterClass;
8654 case 'l':
8655 case 'I':
8656 case 'J':
8657 case 'K':
8658 return C_Immediate;
8659 case 'k':
8660 return C_Memory;
8661 }
8662 }
8663
8664 if (Constraint == "ZC" || Constraint == "ZB")
8665 return C_Memory;
8666
8667 // 'm' is handled here.
8668 return TargetLowering::getConstraintType(Constraint);
8669}
8670
8671InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8672 StringRef ConstraintCode) const {
8673 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8677 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8678}
8679
8680std::pair<unsigned, const TargetRegisterClass *>
8681LoongArchTargetLowering::getRegForInlineAsmConstraint(
8682 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8683 // First, see if this is a constraint that directly corresponds to a LoongArch
8684 // register class.
8685 if (Constraint.size() == 1) {
8686 switch (Constraint[0]) {
8687 case 'r':
8688 // TODO: Support fixed vectors up to GRLen?
8689 if (VT.isVector())
8690 break;
8691 return std::make_pair(0U, &LoongArch::GPRRegClass);
8692 case 'q':
8693 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8694 case 'f':
8695 if (Subtarget.hasBasicF() && VT == MVT::f32)
8696 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8697 if (Subtarget.hasBasicD() && VT == MVT::f64)
8698 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8699 if (Subtarget.hasExtLSX() &&
8700 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8701 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8702 if (Subtarget.hasExtLASX() &&
8703 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8704 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8705 break;
8706 default:
8707 break;
8708 }
8709 }
8710
8711 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8712 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8713 // constraints while the official register name is prefixed with a '$'. So we
8714 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8715 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8716 // case insensitive, so no need to convert the constraint to upper case here.
8717 //
8718 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8719 // decode the usage of register name aliases into their official names. And
8720 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8721 // official register names.
8722 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8723 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8724 bool IsFP = Constraint[2] == 'f';
8725 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8726 std::pair<unsigned, const TargetRegisterClass *> R;
8728 TRI, join_items("", Temp.first, Temp.second), VT);
8729 // Match those names to the widest floating point register type available.
8730 if (IsFP) {
8731 unsigned RegNo = R.first;
8732 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8733 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8734 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8735 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8736 }
8737 }
8738 }
8739 return R;
8740 }
8741
8742 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8743}
8744
8745void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8746 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8747 SelectionDAG &DAG) const {
8748 // Currently only support length 1 constraints.
8749 if (Constraint.size() == 1) {
8750 switch (Constraint[0]) {
8751 case 'l':
8752 // Validate & create a 16-bit signed immediate operand.
8753 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8754 uint64_t CVal = C->getSExtValue();
8755 if (isInt<16>(CVal))
8756 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8757 Subtarget.getGRLenVT()));
8758 }
8759 return;
8760 case 'I':
8761 // Validate & create a 12-bit signed immediate operand.
8762 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8763 uint64_t CVal = C->getSExtValue();
8764 if (isInt<12>(CVal))
8765 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8766 Subtarget.getGRLenVT()));
8767 }
8768 return;
8769 case 'J':
8770 // Validate & create an integer zero operand.
8771 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8772 if (C->getZExtValue() == 0)
8773 Ops.push_back(
8774 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8775 return;
8776 case 'K':
8777 // Validate & create a 12-bit unsigned immediate operand.
8778 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8779 uint64_t CVal = C->getZExtValue();
8780 if (isUInt<12>(CVal))
8781 Ops.push_back(
8782 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8783 }
8784 return;
8785 default:
8786 break;
8787 }
8788 }
8790}
8791
8792#define GET_REGISTER_MATCHER
8793#include "LoongArchGenAsmMatcher.inc"
8794
8797 const MachineFunction &MF) const {
8798 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8799 std::string NewRegName = Name.second.str();
8800 Register Reg = MatchRegisterAltName(NewRegName);
8801 if (!Reg)
8802 Reg = MatchRegisterName(NewRegName);
8803 if (!Reg)
8804 return Reg;
8805 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8806 if (!ReservedRegs.test(Reg))
8807 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8808 StringRef(RegName) + "\"."));
8809 return Reg;
8810}
8811
8813 EVT VT, SDValue C) const {
8814 // TODO: Support vectors.
8815 if (!VT.isScalarInteger())
8816 return false;
8817
8818 // Omit the optimization if the data size exceeds GRLen.
8819 if (VT.getSizeInBits() > Subtarget.getGRLen())
8820 return false;
8821
8822 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8823 const APInt &Imm = ConstNode->getAPIntValue();
8824 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8825 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8826 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8827 return true;
8828 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8829 if (ConstNode->hasOneUse() &&
8830 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8831 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8832 return true;
8833 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8834 // in which the immediate has two set bits. Or Break (MUL x, imm)
8835 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8836 // equals to (1 << s0) - (1 << s1).
8837 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8838 unsigned Shifts = Imm.countr_zero();
8839 // Reject immediates which can be composed via a single LUI.
8840 if (Shifts >= 12)
8841 return false;
8842 // Reject multiplications can be optimized to
8843 // (SLLI (ALSL x, x, 1/2/3/4), s).
8844 APInt ImmPop = Imm.ashr(Shifts);
8845 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8846 return false;
8847 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8848 // since it needs one more instruction than other 3 cases.
8849 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8850 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8851 (ImmSmall - Imm).isPowerOf2())
8852 return true;
8853 }
8854 }
8855
8856 return false;
8857}
8858
8860 const AddrMode &AM,
8861 Type *Ty, unsigned AS,
8862 Instruction *I) const {
8863 // LoongArch has four basic addressing modes:
8864 // 1. reg
8865 // 2. reg + 12-bit signed offset
8866 // 3. reg + 14-bit signed offset left-shifted by 2
8867 // 4. reg1 + reg2
8868 // TODO: Add more checks after support vector extension.
8869
8870 // No global is ever allowed as a base.
8871 if (AM.BaseGV)
8872 return false;
8873
8874 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8875 // with `UAL` feature.
8876 if (!isInt<12>(AM.BaseOffs) &&
8877 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8878 return false;
8879
8880 switch (AM.Scale) {
8881 case 0:
8882 // "r+i" or just "i", depending on HasBaseReg.
8883 break;
8884 case 1:
8885 // "r+r+i" is not allowed.
8886 if (AM.HasBaseReg && AM.BaseOffs)
8887 return false;
8888 // Otherwise we have "r+r" or "r+i".
8889 break;
8890 case 2:
8891 // "2*r+r" or "2*r+i" is not allowed.
8892 if (AM.HasBaseReg || AM.BaseOffs)
8893 return false;
8894 // Allow "2*r" as "r+r".
8895 break;
8896 default:
8897 return false;
8898 }
8899
8900 return true;
8901}
8902
8904 return isInt<12>(Imm);
8905}
8906
8908 return isInt<12>(Imm);
8909}
8910
8912 // Zexts are free if they can be combined with a load.
8913 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8914 // poorly with type legalization of compares preferring sext.
8915 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8916 EVT MemVT = LD->getMemoryVT();
8917 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8918 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8919 LD->getExtensionType() == ISD::ZEXTLOAD))
8920 return true;
8921 }
8922
8923 return TargetLowering::isZExtFree(Val, VT2);
8924}
8925
8927 EVT DstVT) const {
8928 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8929}
8930
8932 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8933}
8934
8936 // TODO: Support vectors.
8937 if (Y.getValueType().isVector())
8938 return false;
8939
8940 return !isa<ConstantSDNode>(Y);
8941}
8942
8944 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8945 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8946}
8947
8949 Type *Ty, bool IsSigned) const {
8950 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8951 return true;
8952
8953 return IsSigned;
8954}
8955
8957 // Return false to suppress the unnecessary extensions if the LibCall
8958 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8959 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8960 Type.getSizeInBits() < Subtarget.getGRLen()))
8961 return false;
8962 return true;
8963}
8964
8965// memcpy, and other memory intrinsics, typically tries to use wider load/store
8966// if the source/dest is aligned and the copy size is large enough. We therefore
8967// want to align such objects passed to memory intrinsics.
8969 unsigned &MinSize,
8970 Align &PrefAlign) const {
8971 if (!isa<MemIntrinsic>(CI))
8972 return false;
8973
8974 if (Subtarget.is64Bit()) {
8975 MinSize = 8;
8976 PrefAlign = Align(8);
8977 } else {
8978 MinSize = 4;
8979 PrefAlign = Align(4);
8980 }
8981
8982 return true;
8983}
8984
8993
8994bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8995 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8996 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8997 bool IsABIRegCopy = CC.has_value();
8998 EVT ValueVT = Val.getValueType();
8999
9000 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9001 PartVT == MVT::f32) {
9002 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9003 // nan, and cast to f32.
9004 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9005 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9006 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9007 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9008 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9009 Parts[0] = Val;
9010 return true;
9011 }
9012
9013 return false;
9014}
9015
9016SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9017 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9018 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9019 bool IsABIRegCopy = CC.has_value();
9020
9021 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9022 PartVT == MVT::f32) {
9023 SDValue Val = Parts[0];
9024
9025 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9026 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9027 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9028 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9029 return Val;
9030 }
9031
9032 return SDValue();
9033}
9034
9035MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9036 CallingConv::ID CC,
9037 EVT VT) const {
9038 // Use f32 to pass f16.
9039 if (VT == MVT::f16 && Subtarget.hasBasicF())
9040 return MVT::f32;
9041
9043}
9044
9045unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9046 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9047 // Use f32 to pass f16.
9048 if (VT == MVT::f16 && Subtarget.hasBasicF())
9049 return 1;
9050
9052}
9053
9055 SDValue Op, const APInt &OriginalDemandedBits,
9056 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9057 unsigned Depth) const {
9058 EVT VT = Op.getValueType();
9059 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9060 unsigned Opc = Op.getOpcode();
9061 switch (Opc) {
9062 default:
9063 break;
9066 SDValue Src = Op.getOperand(0);
9067 MVT SrcVT = Src.getSimpleValueType();
9068 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9069 unsigned NumElts = SrcVT.getVectorNumElements();
9070
9071 // If we don't need the sign bits at all just return zero.
9072 if (OriginalDemandedBits.countr_zero() >= NumElts)
9073 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9074
9075 // Only demand the vector elements of the sign bits we need.
9076 APInt KnownUndef, KnownZero;
9077 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9078 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9079 TLO, Depth + 1))
9080 return true;
9081
9082 Known.Zero = KnownZero.zext(BitWidth);
9083 Known.Zero.setHighBits(BitWidth - NumElts);
9084
9085 // [X]VMSKLTZ only uses the MSB from each vector element.
9086 KnownBits KnownSrc;
9087 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9088 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9089 Depth + 1))
9090 return true;
9091
9092 if (KnownSrc.One[SrcBits - 1])
9093 Known.One.setLowBits(NumElts);
9094 else if (KnownSrc.Zero[SrcBits - 1])
9095 Known.Zero.setLowBits(NumElts);
9096
9097 // Attempt to avoid multi-use ops if we don't need anything from it.
9099 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9100 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9101 return false;
9102 }
9103 }
9104
9106 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9107}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:130
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...