LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
93 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
94 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
95 setOperationAction(ISD::VASTART, MVT::Other, Custom);
96 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
97
98 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
99 setOperationAction(ISD::TRAP, MVT::Other, Legal);
100
104
105 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
128 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
147 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
158
162 Custom);
163 setOperationAction(ISD::LROUND, MVT::i32, Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
175 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
176 }
177
178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
194 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
196 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
199 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
204 setOperationAction(ISD::FSIN, MVT::f32, Expand);
205 setOperationAction(ISD::FCOS, MVT::f32, Expand);
206 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
207 setOperationAction(ISD::FPOW, MVT::f32, Expand);
209 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
213 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
214 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
218 setOperationAction(ISD::FRINT, MVT::f32, Legal);
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
241 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
245 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
249 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
251 setOperationAction(ISD::FSIN, MVT::f64, Expand);
252 setOperationAction(ISD::FCOS, MVT::f64, Expand);
253 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
254 setOperationAction(ISD::FPOW, MVT::f64, Expand);
256 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
257 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
259 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
260 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
264 setOperationAction(ISD::FRINT, MVT::f64, Legal);
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
285 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
286 setOperationAction(ISD::BITCAST, VT, Legal);
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
318 }
319 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
321 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
323 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
326 }
327 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 setOperationAction(ISD::FSQRT, VT, Legal);
332 setOperationAction(ISD::FNEG, VT, Legal);
335 VT, Expand);
337 }
339 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
340 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
341 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
342 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
343
344 for (MVT VT :
345 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
346 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
348 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
349 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
350 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
351 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
352 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
353 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
354 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
355 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
356 }
357 }
358
359 // Set operations for 'LASX' feature.
360
361 if (Subtarget.hasExtLASX()) {
362 for (MVT VT : LASXVTs) {
363 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
364 setOperationAction(ISD::BITCAST, VT, Legal);
366
372
376 }
377 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
380 Legal);
382 VT, Legal);
389 Expand);
397 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
398 }
399 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
401 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
403 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
406 }
407 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
411 setOperationAction(ISD::FSQRT, VT, Legal);
412 setOperationAction(ISD::FNEG, VT, Legal);
415 VT, Expand);
417 }
418 }
419
420 // Set DAG combine for LA32 and LA64.
421
426
427 // Set DAG combine for 'LSX' feature.
428
429 if (Subtarget.hasExtLSX()) {
431 setTargetDAGCombine(ISD::BITCAST);
432 }
433
434 // Set DAG combine for 'LASX' feature.
435
436 if (Subtarget.hasExtLASX())
438
439 // Compute derived properties from the register classes.
440 computeRegisterProperties(Subtarget.getRegisterInfo());
441
443
446
447 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
448
450
451 // Function alignments.
453 // Set preferred alignments.
454 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
455 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
456 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
457
458 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
459 if (Subtarget.hasLAMCAS())
461
462 if (Subtarget.hasSCQ()) {
464 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
465 }
466}
467
469 const GlobalAddressSDNode *GA) const {
470 // In order to maximise the opportunity for common subexpression elimination,
471 // keep a separate ADD node for the global address offset instead of folding
472 // it in the global address node. Later peephole optimisations may choose to
473 // fold it back in when profitable.
474 return false;
475}
476
478 SelectionDAG &DAG) const {
479 switch (Op.getOpcode()) {
480 case ISD::ATOMIC_FENCE:
481 return lowerATOMIC_FENCE(Op, DAG);
483 return lowerEH_DWARF_CFA(Op, DAG);
485 return lowerGlobalAddress(Op, DAG);
487 return lowerGlobalTLSAddress(Op, DAG);
489 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
491 return lowerINTRINSIC_W_CHAIN(Op, DAG);
493 return lowerINTRINSIC_VOID(Op, DAG);
495 return lowerBlockAddress(Op, DAG);
496 case ISD::JumpTable:
497 return lowerJumpTable(Op, DAG);
498 case ISD::SHL_PARTS:
499 return lowerShiftLeftParts(Op, DAG);
500 case ISD::SRA_PARTS:
501 return lowerShiftRightParts(Op, DAG, true);
502 case ISD::SRL_PARTS:
503 return lowerShiftRightParts(Op, DAG, false);
505 return lowerConstantPool(Op, DAG);
506 case ISD::FP_TO_SINT:
507 return lowerFP_TO_SINT(Op, DAG);
508 case ISD::BITCAST:
509 return lowerBITCAST(Op, DAG);
510 case ISD::UINT_TO_FP:
511 return lowerUINT_TO_FP(Op, DAG);
512 case ISD::SINT_TO_FP:
513 return lowerSINT_TO_FP(Op, DAG);
514 case ISD::VASTART:
515 return lowerVASTART(Op, DAG);
516 case ISD::FRAMEADDR:
517 return lowerFRAMEADDR(Op, DAG);
518 case ISD::RETURNADDR:
519 return lowerRETURNADDR(Op, DAG);
521 return lowerWRITE_REGISTER(Op, DAG);
523 return lowerINSERT_VECTOR_ELT(Op, DAG);
525 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
527 return lowerBUILD_VECTOR(Op, DAG);
529 return lowerCONCAT_VECTORS(Op, DAG);
531 return lowerVECTOR_SHUFFLE(Op, DAG);
532 case ISD::BITREVERSE:
533 return lowerBITREVERSE(Op, DAG);
535 return lowerSCALAR_TO_VECTOR(Op, DAG);
536 case ISD::PREFETCH:
537 return lowerPREFETCH(Op, DAG);
538 case ISD::SELECT:
539 return lowerSELECT(Op, DAG);
540 case ISD::BRCOND:
541 return lowerBRCOND(Op, DAG);
542 case ISD::FP_TO_FP16:
543 return lowerFP_TO_FP16(Op, DAG);
544 case ISD::FP16_TO_FP:
545 return lowerFP16_TO_FP(Op, DAG);
546 case ISD::FP_TO_BF16:
547 return lowerFP_TO_BF16(Op, DAG);
548 case ISD::BF16_TO_FP:
549 return lowerBF16_TO_FP(Op, DAG);
550 case ISD::VECREDUCE_ADD:
551 return lowerVECREDUCE_ADD(Op, DAG);
552 case ISD::VECREDUCE_AND:
553 case ISD::VECREDUCE_OR:
554 case ISD::VECREDUCE_XOR:
555 case ISD::VECREDUCE_SMAX:
556 case ISD::VECREDUCE_SMIN:
557 case ISD::VECREDUCE_UMAX:
558 case ISD::VECREDUCE_UMIN:
559 return lowerVECREDUCE(Op, DAG);
560 }
561 return SDValue();
562}
563
564// Lower vecreduce_add using vhaddw instructions.
565// For Example:
566// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
567// can be lowered to:
568// VHADDW_D_W vr0, vr0, vr0
569// VHADDW_Q_D vr0, vr0, vr0
570// VPICKVE2GR_D a0, vr0, 0
571// ADDI_W a0, a0, 0
572SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
573 SelectionDAG &DAG) const {
574
575 SDLoc DL(Op);
576 MVT OpVT = Op.getSimpleValueType();
577 SDValue Val = Op.getOperand(0);
578
579 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
580 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
581
582 unsigned LegalVecSize = 128;
583 bool isLASX256Vector =
584 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
585
586 // Ensure operand type legal or enable it legal.
587 while (!isTypeLegal(Val.getSimpleValueType())) {
588 Val = DAG.WidenVector(Val, DL);
589 }
590
591 // NumEles is designed for iterations count, v4i32 for LSX
592 // and v8i32 for LASX should have the same count.
593 if (isLASX256Vector) {
594 NumEles /= 2;
595 LegalVecSize = 256;
596 }
597
598 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
599 MVT IntTy = MVT::getIntegerVT(EleBits);
600 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
601 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
602 }
603
604 if (isLASX256Vector) {
605 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
606 DAG.getConstant(2, DL, MVT::i64));
607 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
608 }
609
610 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
611 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
612}
613
614// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
615// For Example:
616// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
617// can be lowered to:
618// VBSRL_V vr1, vr0, 8
619// VMAX_W vr0, vr1, vr0
620// VBSRL_V vr1, vr0, 4
621// VMAX_W vr0, vr1, vr0
622// VPICKVE2GR_W a0, vr0, 0
623// For 256 bit vector, it is illegal and will be spilt into
624// two 128 bit vector by default then processed by this.
625SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
626 SelectionDAG &DAG) const {
627 SDLoc DL(Op);
628
629 MVT OpVT = Op.getSimpleValueType();
630 SDValue Val = Op.getOperand(0);
631
632 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
633 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
634
635 // Ensure operand type legal or enable it legal.
636 while (!isTypeLegal(Val.getSimpleValueType())) {
637 Val = DAG.WidenVector(Val, DL);
638 }
639
640 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
641 MVT VecTy = Val.getSimpleValueType();
642
643 for (int i = NumEles; i > 1; i /= 2) {
644 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
645 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
646 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
647 }
648
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
650 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
651}
652
653SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
654 SelectionDAG &DAG) const {
655 unsigned IsData = Op.getConstantOperandVal(4);
656
657 // We don't support non-data prefetch.
658 // Just preserve the chain.
659 if (!IsData)
660 return Op.getOperand(0);
661
662 return Op;
663}
664
665// Return true if Val is equal to (setcc LHS, RHS, CC).
666// Return false if Val is the inverse of (setcc LHS, RHS, CC).
667// Otherwise, return std::nullopt.
668static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
669 ISD::CondCode CC, SDValue Val) {
670 assert(Val->getOpcode() == ISD::SETCC);
671 SDValue LHS2 = Val.getOperand(0);
672 SDValue RHS2 = Val.getOperand(1);
673 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
674
675 if (LHS == LHS2 && RHS == RHS2) {
676 if (CC == CC2)
677 return true;
678 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
679 return false;
680 } else if (LHS == RHS2 && RHS == LHS2) {
682 if (CC == CC2)
683 return true;
684 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
685 return false;
686 }
687
688 return std::nullopt;
689}
690
692 const LoongArchSubtarget &Subtarget) {
693 SDValue CondV = N->getOperand(0);
694 SDValue TrueV = N->getOperand(1);
695 SDValue FalseV = N->getOperand(2);
696 MVT VT = N->getSimpleValueType(0);
697 SDLoc DL(N);
698
699 // (select c, -1, y) -> -c | y
700 if (isAllOnesConstant(TrueV)) {
701 SDValue Neg = DAG.getNegative(CondV, DL, VT);
702 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
703 }
704 // (select c, y, -1) -> (c-1) | y
705 if (isAllOnesConstant(FalseV)) {
706 SDValue Neg =
707 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
708 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
709 }
710
711 // (select c, 0, y) -> (c-1) & y
712 if (isNullConstant(TrueV)) {
713 SDValue Neg =
714 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
715 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
716 }
717 // (select c, y, 0) -> -c & y
718 if (isNullConstant(FalseV)) {
719 SDValue Neg = DAG.getNegative(CondV, DL, VT);
720 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
721 }
722
723 // select c, ~x, x --> xor -c, x
724 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
725 const APInt &TrueVal = TrueV->getAsAPIntVal();
726 const APInt &FalseVal = FalseV->getAsAPIntVal();
727 if (~TrueVal == FalseVal) {
728 SDValue Neg = DAG.getNegative(CondV, DL, VT);
729 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
730 }
731 }
732
733 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
734 // when both truev and falsev are also setcc.
735 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
736 FalseV.getOpcode() == ISD::SETCC) {
737 SDValue LHS = CondV.getOperand(0);
738 SDValue RHS = CondV.getOperand(1);
739 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
740
741 // (select x, x, y) -> x | y
742 // (select !x, x, y) -> x & y
743 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
744 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
745 DAG.getFreeze(FalseV));
746 }
747 // (select x, y, x) -> x & y
748 // (select !x, y, x) -> x | y
749 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
750 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
751 DAG.getFreeze(TrueV), FalseV);
752 }
753 }
754
755 return SDValue();
756}
757
758// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
759// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
760// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
761// being `0` or `-1`. In such cases we can replace `select` with `and`.
762// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
763// than `c0`?
764static SDValue
766 const LoongArchSubtarget &Subtarget) {
767 unsigned SelOpNo = 0;
768 SDValue Sel = BO->getOperand(0);
769 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
770 SelOpNo = 1;
771 Sel = BO->getOperand(1);
772 }
773
774 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
775 return SDValue();
776
777 unsigned ConstSelOpNo = 1;
778 unsigned OtherSelOpNo = 2;
779 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
780 ConstSelOpNo = 2;
781 OtherSelOpNo = 1;
782 }
783 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
784 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
785 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
786 return SDValue();
787
788 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
789 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
790 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
791 return SDValue();
792
793 SDLoc DL(Sel);
794 EVT VT = BO->getValueType(0);
795
796 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
797 if (SelOpNo == 1)
798 std::swap(NewConstOps[0], NewConstOps[1]);
799
800 SDValue NewConstOp =
801 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
802 if (!NewConstOp)
803 return SDValue();
804
805 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
806 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
807 return SDValue();
808
809 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
810 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
811 if (SelOpNo == 1)
812 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
813 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
814
815 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
816 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
817 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
818}
819
820// Changes the condition code and swaps operands if necessary, so the SetCC
821// operation matches one of the comparisons supported directly by branches
822// in the LoongArch ISA. May adjust compares to favor compare with 0 over
823// compare with 1/-1.
825 ISD::CondCode &CC, SelectionDAG &DAG) {
826 // If this is a single bit test that can't be handled by ANDI, shift the
827 // bit to be tested to the MSB and perform a signed compare with 0.
828 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
829 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
830 isa<ConstantSDNode>(LHS.getOperand(1))) {
831 uint64_t Mask = LHS.getConstantOperandVal(1);
832 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
833 unsigned ShAmt = 0;
834 if (isPowerOf2_64(Mask)) {
835 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
836 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
837 } else {
838 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
839 }
840
841 LHS = LHS.getOperand(0);
842 if (ShAmt != 0)
843 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
844 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
845 return;
846 }
847 }
848
849 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
850 int64_t C = RHSC->getSExtValue();
851 switch (CC) {
852 default:
853 break;
854 case ISD::SETGT:
855 // Convert X > -1 to X >= 0.
856 if (C == -1) {
857 RHS = DAG.getConstant(0, DL, RHS.getValueType());
858 CC = ISD::SETGE;
859 return;
860 }
861 break;
862 case ISD::SETLT:
863 // Convert X < 1 to 0 >= X.
864 if (C == 1) {
865 RHS = LHS;
866 LHS = DAG.getConstant(0, DL, RHS.getValueType());
867 CC = ISD::SETGE;
868 return;
869 }
870 break;
871 }
872 }
873
874 switch (CC) {
875 default:
876 break;
877 case ISD::SETGT:
878 case ISD::SETLE:
879 case ISD::SETUGT:
880 case ISD::SETULE:
882 std::swap(LHS, RHS);
883 break;
884 }
885}
886
887SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
888 SelectionDAG &DAG) const {
889 SDValue CondV = Op.getOperand(0);
890 SDValue TrueV = Op.getOperand(1);
891 SDValue FalseV = Op.getOperand(2);
892 SDLoc DL(Op);
893 MVT VT = Op.getSimpleValueType();
894 MVT GRLenVT = Subtarget.getGRLenVT();
895
896 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
897 return V;
898
899 if (Op.hasOneUse()) {
900 unsigned UseOpc = Op->user_begin()->getOpcode();
901 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
902 SDNode *BinOp = *Op->user_begin();
903 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
904 DAG, Subtarget)) {
905 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
906 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
907 // may return a constant node and cause crash in lowerSELECT.
908 if (NewSel.getOpcode() == ISD::SELECT)
909 return lowerSELECT(NewSel, DAG);
910 return NewSel;
911 }
912 }
913 }
914
915 // If the condition is not an integer SETCC which operates on GRLenVT, we need
916 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
917 // (select condv, truev, falsev)
918 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
919 if (CondV.getOpcode() != ISD::SETCC ||
920 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
921 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
922 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
923
924 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
925
926 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
927 }
928
929 // If the CondV is the output of a SETCC node which operates on GRLenVT
930 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
931 // to take advantage of the integer compare+branch instructions. i.e.: (select
932 // (setcc lhs, rhs, cc), truev, falsev)
933 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
934 SDValue LHS = CondV.getOperand(0);
935 SDValue RHS = CondV.getOperand(1);
936 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
937
938 // Special case for a select of 2 constants that have a difference of 1.
939 // Normally this is done by DAGCombine, but if the select is introduced by
940 // type legalization or op legalization, we miss it. Restricting to SETLT
941 // case for now because that is what signed saturating add/sub need.
942 // FIXME: We don't need the condition to be SETLT or even a SETCC,
943 // but we would probably want to swap the true/false values if the condition
944 // is SETGE/SETLE to avoid an XORI.
945 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
946 CCVal == ISD::SETLT) {
947 const APInt &TrueVal = TrueV->getAsAPIntVal();
948 const APInt &FalseVal = FalseV->getAsAPIntVal();
949 if (TrueVal - 1 == FalseVal)
950 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
951 if (TrueVal + 1 == FalseVal)
952 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
953 }
954
955 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
956 // 1 < x ? x : 1 -> 0 < x ? x : 1
957 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
958 RHS == TrueV && LHS == FalseV) {
959 LHS = DAG.getConstant(0, DL, VT);
960 // 0 <u x is the same as x != 0.
961 if (CCVal == ISD::SETULT) {
962 std::swap(LHS, RHS);
963 CCVal = ISD::SETNE;
964 }
965 }
966
967 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
968 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
969 RHS == FalseV) {
970 RHS = DAG.getConstant(0, DL, VT);
971 }
972
973 SDValue TargetCC = DAG.getCondCode(CCVal);
974
975 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
976 // (select (setcc lhs, rhs, CC), constant, falsev)
977 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
978 std::swap(TrueV, FalseV);
979 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
980 }
981
982 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
983 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
984}
985
986SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
987 SelectionDAG &DAG) const {
988 SDValue CondV = Op.getOperand(1);
989 SDLoc DL(Op);
990 MVT GRLenVT = Subtarget.getGRLenVT();
991
992 if (CondV.getOpcode() == ISD::SETCC) {
993 if (CondV.getOperand(0).getValueType() == GRLenVT) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
999
1000 SDValue TargetCC = DAG.getCondCode(CCVal);
1001 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1002 Op.getOperand(0), LHS, RHS, TargetCC,
1003 Op.getOperand(2));
1004 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1005 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1006 Op.getOperand(0), CondV, Op.getOperand(2));
1007 }
1008 }
1009
1010 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1011 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1012 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1013}
1014
1015SDValue
1016LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1017 SelectionDAG &DAG) const {
1018 SDLoc DL(Op);
1019 MVT OpVT = Op.getSimpleValueType();
1020
1021 SDValue Vector = DAG.getUNDEF(OpVT);
1022 SDValue Val = Op.getOperand(0);
1023 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1024
1025 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1026}
1027
1028SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1029 SelectionDAG &DAG) const {
1030 EVT ResTy = Op->getValueType(0);
1031 SDValue Src = Op->getOperand(0);
1032 SDLoc DL(Op);
1033
1034 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1035 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1036 unsigned int NewEltNum = NewVT.getVectorNumElements();
1037
1038 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1039
1041 for (unsigned int i = 0; i < NewEltNum; i++) {
1042 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1043 DAG.getConstant(i, DL, MVT::i64));
1044 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1045 ? (unsigned)LoongArchISD::BITREV_8B
1046 : (unsigned)ISD::BITREVERSE;
1047 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1048 }
1049 SDValue Res =
1050 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1051
1052 switch (ResTy.getSimpleVT().SimpleTy) {
1053 default:
1054 return SDValue();
1055 case MVT::v16i8:
1056 case MVT::v32i8:
1057 return Res;
1058 case MVT::v8i16:
1059 case MVT::v16i16:
1060 case MVT::v4i32:
1061 case MVT::v8i32: {
1063 for (unsigned int i = 0; i < NewEltNum; i++)
1064 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1065 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1066 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1067 }
1068 }
1069}
1070
1071// Widen element type to get a new mask value (if possible).
1072// For example:
1073// shufflevector <4 x i32> %a, <4 x i32> %b,
1074// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1075// is equivalent to:
1076// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1077// can be lowered to:
1078// VPACKOD_D vr0, vr0, vr1
1080 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1081 unsigned EltBits = VT.getScalarSizeInBits();
1082
1083 if (EltBits > 32 || EltBits == 1)
1084 return SDValue();
1085
1086 SmallVector<int, 8> NewMask;
1087 if (widenShuffleMaskElts(Mask, NewMask)) {
1088 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1089 : MVT::getIntegerVT(EltBits * 2);
1090 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1091 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1092 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1093 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1094 return DAG.getBitcast(
1095 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1096 }
1097 }
1098
1099 return SDValue();
1100}
1101
1102/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1103/// instruction.
1104// The funciton matches elements from one of the input vector shuffled to the
1105// left or right with zeroable elements 'shifted in'. It handles both the
1106// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1107// lane.
1108// Mostly copied from X86.
1109static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1110 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1111 int MaskOffset, const APInt &Zeroable) {
1112 int Size = Mask.size();
1113 unsigned SizeInBits = Size * ScalarSizeInBits;
1114
1115 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1116 for (int i = 0; i < Size; i += Scale)
1117 for (int j = 0; j < Shift; ++j)
1118 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1119 return false;
1120
1121 return true;
1122 };
1123
1124 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1125 int Step = 1) {
1126 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1127 if (!(Mask[i] == -1 || Mask[i] == Low))
1128 return false;
1129 return true;
1130 };
1131
1132 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1133 for (int i = 0; i != Size; i += Scale) {
1134 unsigned Pos = Left ? i + Shift : i;
1135 unsigned Low = Left ? i : i + Shift;
1136 unsigned Len = Scale - Shift;
1137 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1138 return -1;
1139 }
1140
1141 int ShiftEltBits = ScalarSizeInBits * Scale;
1142 bool ByteShift = ShiftEltBits > 64;
1143 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1144 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1145 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1146
1147 // Normalize the scale for byte shifts to still produce an i64 element
1148 // type.
1149 Scale = ByteShift ? Scale / 2 : Scale;
1150
1151 // We need to round trip through the appropriate type for the shift.
1152 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1153 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1154 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1155 return (int)ShiftAmt;
1156 };
1157
1158 unsigned MaxWidth = 128;
1159 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1160 for (int Shift = 1; Shift != Scale; ++Shift)
1161 for (bool Left : {true, false})
1162 if (CheckZeros(Shift, Scale, Left)) {
1163 int ShiftAmt = MatchShift(Shift, Scale, Left);
1164 if (0 < ShiftAmt)
1165 return ShiftAmt;
1166 }
1167
1168 // no match
1169 return -1;
1170}
1171
1172/// Lower VECTOR_SHUFFLE as shift (if possible).
1173///
1174/// For example:
1175/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1176/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1177/// is lowered to:
1178/// (VBSLL_V $v0, $v0, 4)
1179///
1180/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1181/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1182/// is lowered to:
1183/// (VSLLI_D $v0, $v0, 32)
1185 MVT VT, SDValue V1, SDValue V2,
1186 SelectionDAG &DAG,
1187 const LoongArchSubtarget &Subtarget,
1188 const APInt &Zeroable) {
1189 int Size = Mask.size();
1190 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1191
1192 MVT ShiftVT;
1193 SDValue V = V1;
1194 unsigned Opcode;
1195
1196 // Try to match shuffle against V1 shift.
1197 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1198 Mask, 0, Zeroable);
1199
1200 // If V1 failed, try to match shuffle against V2 shift.
1201 if (ShiftAmt < 0) {
1202 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1203 Mask, Size, Zeroable);
1204 V = V2;
1205 }
1206
1207 if (ShiftAmt < 0)
1208 return SDValue();
1209
1210 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1211 "Illegal integer vector type");
1212 V = DAG.getBitcast(ShiftVT, V);
1213 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1214 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1215 return DAG.getBitcast(VT, V);
1216}
1217
1218/// Determine whether a range fits a regular pattern of values.
1219/// This function accounts for the possibility of jumping over the End iterator.
1220template <typename ValType>
1221static bool
1223 unsigned CheckStride,
1225 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1226 auto &I = Begin;
1227
1228 while (I != End) {
1229 if (*I != -1 && *I != ExpectedIndex)
1230 return false;
1231 ExpectedIndex += ExpectedIndexStride;
1232
1233 // Incrementing past End is undefined behaviour so we must increment one
1234 // step at a time and check for End at each step.
1235 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1236 ; // Empty loop body.
1237 }
1238 return true;
1239}
1240
1241/// Compute whether each element of a shuffle is zeroable.
1242///
1243/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1245 SDValue V2, APInt &KnownUndef,
1246 APInt &KnownZero) {
1247 int Size = Mask.size();
1248 KnownUndef = KnownZero = APInt::getZero(Size);
1249
1250 V1 = peekThroughBitcasts(V1);
1251 V2 = peekThroughBitcasts(V2);
1252
1253 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1254 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1255
1256 int VectorSizeInBits = V1.getValueSizeInBits();
1257 int ScalarSizeInBits = VectorSizeInBits / Size;
1258 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1259 (void)ScalarSizeInBits;
1260
1261 for (int i = 0; i < Size; ++i) {
1262 int M = Mask[i];
1263 if (M < 0) {
1264 KnownUndef.setBit(i);
1265 continue;
1266 }
1267 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1268 KnownZero.setBit(i);
1269 continue;
1270 }
1271 }
1272}
1273
1274/// Test whether a shuffle mask is equivalent within each sub-lane.
1275///
1276/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1277/// non-trivial to compute in the face of undef lanes. The representation is
1278/// suitable for use with existing 128-bit shuffles as entries from the second
1279/// vector have been remapped to [LaneSize, 2*LaneSize).
1280static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1281 ArrayRef<int> Mask,
1282 SmallVectorImpl<int> &RepeatedMask) {
1283 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1284 RepeatedMask.assign(LaneSize, -1);
1285 int Size = Mask.size();
1286 for (int i = 0; i < Size; ++i) {
1287 assert(Mask[i] == -1 || Mask[i] >= 0);
1288 if (Mask[i] < 0)
1289 continue;
1290 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1291 // This entry crosses lanes, so there is no way to model this shuffle.
1292 return false;
1293
1294 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1295 // Adjust second vector indices to start at LaneSize instead of Size.
1296 int LocalM =
1297 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1298 if (RepeatedMask[i % LaneSize] < 0)
1299 // This is the first non-undef entry in this slot of a 128-bit lane.
1300 RepeatedMask[i % LaneSize] = LocalM;
1301 else if (RepeatedMask[i % LaneSize] != LocalM)
1302 // Found a mismatch with the repeated mask.
1303 return false;
1304 }
1305 return true;
1306}
1307
1308/// Attempts to match vector shuffle as byte rotation.
1310 ArrayRef<int> Mask) {
1311
1312 SDValue Lo, Hi;
1313 SmallVector<int, 16> RepeatedMask;
1314
1315 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1316 return -1;
1317
1318 int NumElts = RepeatedMask.size();
1319 int Rotation = 0;
1320 int Scale = 16 / NumElts;
1321
1322 for (int i = 0; i < NumElts; ++i) {
1323 int M = RepeatedMask[i];
1324 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1325 "Unexpected mask index.");
1326 if (M < 0)
1327 continue;
1328
1329 // Determine where a rotated vector would have started.
1330 int StartIdx = i - (M % NumElts);
1331 if (StartIdx == 0)
1332 return -1;
1333
1334 // If we found the tail of a vector the rotation must be the missing
1335 // front. If we found the head of a vector, it must be how much of the
1336 // head.
1337 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1338
1339 if (Rotation == 0)
1340 Rotation = CandidateRotation;
1341 else if (Rotation != CandidateRotation)
1342 return -1;
1343
1344 // Compute which value this mask is pointing at.
1345 SDValue MaskV = M < NumElts ? V1 : V2;
1346
1347 // Compute which of the two target values this index should be assigned
1348 // to. This reflects whether the high elements are remaining or the low
1349 // elements are remaining.
1350 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1351
1352 // Either set up this value if we've not encountered it before, or check
1353 // that it remains consistent.
1354 if (!TargetV)
1355 TargetV = MaskV;
1356 else if (TargetV != MaskV)
1357 return -1;
1358 }
1359
1360 // Check that we successfully analyzed the mask, and normalize the results.
1361 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1362 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1363 if (!Lo)
1364 Lo = Hi;
1365 else if (!Hi)
1366 Hi = Lo;
1367
1368 V1 = Lo;
1369 V2 = Hi;
1370
1371 return Rotation * Scale;
1372}
1373
1374/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1375///
1376/// For example:
1377/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1378/// <2 x i32> <i32 3, i32 0>
1379/// is lowered to:
1380/// (VBSRL_V $v1, $v1, 8)
1381/// (VBSLL_V $v0, $v0, 8)
1382/// (VOR_V $v0, $V0, $v1)
1383static SDValue
1385 SDValue V1, SDValue V2, SelectionDAG &DAG,
1386 const LoongArchSubtarget &Subtarget) {
1387
1388 SDValue Lo = V1, Hi = V2;
1389 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1390 if (ByteRotation <= 0)
1391 return SDValue();
1392
1393 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1394 Lo = DAG.getBitcast(ByteVT, Lo);
1395 Hi = DAG.getBitcast(ByteVT, Hi);
1396
1397 int LoByteShift = 16 - ByteRotation;
1398 int HiByteShift = ByteRotation;
1399 MVT GRLenVT = Subtarget.getGRLenVT();
1400
1401 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1402 DAG.getConstant(LoByteShift, DL, GRLenVT));
1403 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1404 DAG.getConstant(HiByteShift, DL, GRLenVT));
1405 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1406}
1407
1408/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1409///
1410/// For example:
1411/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1412/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1413/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1414/// is lowered to:
1415/// (VREPLI $v1, 0)
1416/// (VILVL $v0, $v1, $v0)
1418 ArrayRef<int> Mask, MVT VT,
1419 SDValue V1, SDValue V2,
1420 SelectionDAG &DAG,
1421 const APInt &Zeroable) {
1422 int Bits = VT.getSizeInBits();
1423 int EltBits = VT.getScalarSizeInBits();
1424 int NumElements = VT.getVectorNumElements();
1425
1426 if (Zeroable.isAllOnes())
1427 return DAG.getConstant(0, DL, VT);
1428
1429 // Define a helper function to check a particular ext-scale and lower to it if
1430 // valid.
1431 auto Lower = [&](int Scale) -> SDValue {
1432 SDValue InputV;
1433 bool AnyExt = true;
1434 int Offset = 0;
1435 for (int i = 0; i < NumElements; i++) {
1436 int M = Mask[i];
1437 if (M < 0)
1438 continue;
1439 if (i % Scale != 0) {
1440 // Each of the extended elements need to be zeroable.
1441 if (!Zeroable[i])
1442 return SDValue();
1443
1444 AnyExt = false;
1445 continue;
1446 }
1447
1448 // Each of the base elements needs to be consecutive indices into the
1449 // same input vector.
1450 SDValue V = M < NumElements ? V1 : V2;
1451 M = M % NumElements;
1452 if (!InputV) {
1453 InputV = V;
1454 Offset = M - (i / Scale);
1455
1456 // These offset can't be handled
1457 if (Offset % (NumElements / Scale))
1458 return SDValue();
1459 } else if (InputV != V)
1460 return SDValue();
1461
1462 if (M != (Offset + (i / Scale)))
1463 return SDValue(); // Non-consecutive strided elements.
1464 }
1465
1466 // If we fail to find an input, we have a zero-shuffle which should always
1467 // have already been handled.
1468 if (!InputV)
1469 return SDValue();
1470
1471 do {
1472 unsigned VilVLoHi = LoongArchISD::VILVL;
1473 if (Offset >= (NumElements / 2)) {
1474 VilVLoHi = LoongArchISD::VILVH;
1475 Offset -= (NumElements / 2);
1476 }
1477
1478 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1479 SDValue Ext =
1480 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1481 InputV = DAG.getBitcast(InputVT, InputV);
1482 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1483 Scale /= 2;
1484 EltBits *= 2;
1485 NumElements /= 2;
1486 } while (Scale > 1);
1487 return DAG.getBitcast(VT, InputV);
1488 };
1489
1490 // Each iteration, try extending the elements half as much, but into twice as
1491 // many elements.
1492 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1493 NumExtElements *= 2) {
1494 if (SDValue V = Lower(NumElements / NumExtElements))
1495 return V;
1496 }
1497 return SDValue();
1498}
1499
1500/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1501///
1502/// VREPLVEI performs vector broadcast based on an element specified by an
1503/// integer immediate, with its mask being similar to:
1504/// <x, x, x, ...>
1505/// where x is any valid index.
1506///
1507/// When undef's appear in the mask they are treated as if they were whatever
1508/// value is necessary in order to fit the above form.
1509static SDValue
1511 SDValue V1, SDValue V2, SelectionDAG &DAG,
1512 const LoongArchSubtarget &Subtarget) {
1513 int SplatIndex = -1;
1514 for (const auto &M : Mask) {
1515 if (M != -1) {
1516 SplatIndex = M;
1517 break;
1518 }
1519 }
1520
1521 if (SplatIndex == -1)
1522 return DAG.getUNDEF(VT);
1523
1524 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1525 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1526 APInt Imm(64, SplatIndex);
1527 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1528 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1529 }
1530
1531 return SDValue();
1532}
1533
1534/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1535///
1536/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1537/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1538///
1539/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1540/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1541/// When undef's appear they are treated as if they were whatever value is
1542/// necessary in order to fit the above forms.
1543///
1544/// For example:
1545/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1546/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1547/// i32 7, i32 6, i32 5, i32 4>
1548/// is lowered to:
1549/// (VSHUF4I_H $v0, $v1, 27)
1550/// where the 27 comes from:
1551/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1552static SDValue
1554 SDValue V1, SDValue V2, SelectionDAG &DAG,
1555 const LoongArchSubtarget &Subtarget) {
1556
1557 unsigned SubVecSize = 4;
1558 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1559 SubVecSize = 2;
1560
1561 int SubMask[4] = {-1, -1, -1, -1};
1562 for (unsigned i = 0; i < SubVecSize; ++i) {
1563 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1564 int M = Mask[j];
1565
1566 // Convert from vector index to 4-element subvector index
1567 // If an index refers to an element outside of the subvector then give up
1568 if (M != -1) {
1569 M -= 4 * (j / SubVecSize);
1570 if (M < 0 || M >= 4)
1571 return SDValue();
1572 }
1573
1574 // If the mask has an undef, replace it with the current index.
1575 // Note that it might still be undef if the current index is also undef
1576 if (SubMask[i] == -1)
1577 SubMask[i] = M;
1578 // Check that non-undef values are the same as in the mask. If they
1579 // aren't then give up
1580 else if (M != -1 && M != SubMask[i])
1581 return SDValue();
1582 }
1583 }
1584
1585 // Calculate the immediate. Replace any remaining undefs with zero
1586 APInt Imm(64, 0);
1587 for (int i = SubVecSize - 1; i >= 0; --i) {
1588 int M = SubMask[i];
1589
1590 if (M == -1)
1591 M = 0;
1592
1593 Imm <<= 2;
1594 Imm |= M & 0x3;
1595 }
1596
1597 MVT GRLenVT = Subtarget.getGRLenVT();
1598
1599 // Return vshuf4i.d
1600 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1601 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1602 DAG.getConstant(Imm, DL, GRLenVT));
1603
1604 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1605 DAG.getConstant(Imm, DL, GRLenVT));
1606}
1607
1608/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1609///
1610/// VPACKEV interleaves the even elements from each vector.
1611///
1612/// It is possible to lower into VPACKEV when the mask consists of two of the
1613/// following forms interleaved:
1614/// <0, 2, 4, ...>
1615/// <n, n+2, n+4, ...>
1616/// where n is the number of elements in the vector.
1617/// For example:
1618/// <0, 0, 2, 2, 4, 4, ...>
1619/// <0, n, 2, n+2, 4, n+4, ...>
1620///
1621/// When undef's appear in the mask they are treated as if they were whatever
1622/// value is necessary in order to fit the above forms.
1624 MVT VT, SDValue V1, SDValue V2,
1625 SelectionDAG &DAG) {
1626
1627 const auto &Begin = Mask.begin();
1628 const auto &End = Mask.end();
1629 SDValue OriV1 = V1, OriV2 = V2;
1630
1631 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1632 V1 = OriV1;
1633 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1634 V1 = OriV2;
1635 else
1636 return SDValue();
1637
1638 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1639 V2 = OriV1;
1640 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1641 V2 = OriV2;
1642 else
1643 return SDValue();
1644
1645 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1649///
1650/// VPACKOD interleaves the odd elements from each vector.
1651///
1652/// It is possible to lower into VPACKOD when the mask consists of two of the
1653/// following forms interleaved:
1654/// <1, 3, 5, ...>
1655/// <n+1, n+3, n+5, ...>
1656/// where n is the number of elements in the vector.
1657/// For example:
1658/// <1, 1, 3, 3, 5, 5, ...>
1659/// <1, n+1, 3, n+3, 5, n+5, ...>
1660///
1661/// When undef's appear in the mask they are treated as if they were whatever
1662/// value is necessary in order to fit the above forms.
1664 MVT VT, SDValue V1, SDValue V2,
1665 SelectionDAG &DAG) {
1666
1667 const auto &Begin = Mask.begin();
1668 const auto &End = Mask.end();
1669 SDValue OriV1 = V1, OriV2 = V2;
1670
1671 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1672 V1 = OriV1;
1673 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1674 V1 = OriV2;
1675 else
1676 return SDValue();
1677
1678 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1679 V2 = OriV1;
1680 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1681 V2 = OriV2;
1682 else
1683 return SDValue();
1684
1685 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1686}
1687
1688/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1689///
1690/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1691/// of each vector.
1692///
1693/// It is possible to lower into VILVH when the mask consists of two of the
1694/// following forms interleaved:
1695/// <x, x+1, x+2, ...>
1696/// <n+x, n+x+1, n+x+2, ...>
1697/// where n is the number of elements in the vector and x is half n.
1698/// For example:
1699/// <x, x, x+1, x+1, x+2, x+2, ...>
1700/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1701///
1702/// When undef's appear in the mask they are treated as if they were whatever
1703/// value is necessary in order to fit the above forms.
1705 MVT VT, SDValue V1, SDValue V2,
1706 SelectionDAG &DAG) {
1707
1708 const auto &Begin = Mask.begin();
1709 const auto &End = Mask.end();
1710 unsigned HalfSize = Mask.size() / 2;
1711 SDValue OriV1 = V1, OriV2 = V2;
1712
1713 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1714 V1 = OriV1;
1715 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1716 V1 = OriV2;
1717 else
1718 return SDValue();
1719
1720 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1721 V2 = OriV1;
1722 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1723 1))
1724 V2 = OriV2;
1725 else
1726 return SDValue();
1727
1728 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1729}
1730
1731/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1732///
1733/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1734/// of each vector.
1735///
1736/// It is possible to lower into VILVL when the mask consists of two of the
1737/// following forms interleaved:
1738/// <0, 1, 2, ...>
1739/// <n, n+1, n+2, ...>
1740/// where n is the number of elements in the vector.
1741/// For example:
1742/// <0, 0, 1, 1, 2, 2, ...>
1743/// <0, n, 1, n+1, 2, n+2, ...>
1744///
1745/// When undef's appear in the mask they are treated as if they were whatever
1746/// value is necessary in order to fit the above forms.
1748 MVT VT, SDValue V1, SDValue V2,
1749 SelectionDAG &DAG) {
1750
1751 const auto &Begin = Mask.begin();
1752 const auto &End = Mask.end();
1753 SDValue OriV1 = V1, OriV2 = V2;
1754
1755 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1756 V1 = OriV1;
1757 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1758 V1 = OriV2;
1759 else
1760 return SDValue();
1761
1762 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1763 V2 = OriV1;
1764 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1765 V2 = OriV2;
1766 else
1767 return SDValue();
1768
1769 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1770}
1771
1772/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1773///
1774/// VPICKEV copies the even elements of each vector into the result vector.
1775///
1776/// It is possible to lower into VPICKEV when the mask consists of two of the
1777/// following forms concatenated:
1778/// <0, 2, 4, ...>
1779/// <n, n+2, n+4, ...>
1780/// where n is the number of elements in the vector.
1781/// For example:
1782/// <0, 2, 4, ..., 0, 2, 4, ...>
1783/// <0, 2, 4, ..., n, n+2, n+4, ...>
1784///
1785/// When undef's appear in the mask they are treated as if they were whatever
1786/// value is necessary in order to fit the above forms.
1788 MVT VT, SDValue V1, SDValue V2,
1789 SelectionDAG &DAG) {
1790
1791 const auto &Begin = Mask.begin();
1792 const auto &Mid = Mask.begin() + Mask.size() / 2;
1793 const auto &End = Mask.end();
1794 SDValue OriV1 = V1, OriV2 = V2;
1795
1796 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1797 V1 = OriV1;
1798 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1799 V1 = OriV2;
1800 else
1801 return SDValue();
1802
1803 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1804 V2 = OriV1;
1805 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1806 V2 = OriV2;
1807
1808 else
1809 return SDValue();
1810
1811 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1812}
1813
1814/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1815///
1816/// VPICKOD copies the odd elements of each vector into the result vector.
1817///
1818/// It is possible to lower into VPICKOD when the mask consists of two of the
1819/// following forms concatenated:
1820/// <1, 3, 5, ...>
1821/// <n+1, n+3, n+5, ...>
1822/// where n is the number of elements in the vector.
1823/// For example:
1824/// <1, 3, 5, ..., 1, 3, 5, ...>
1825/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1826///
1827/// When undef's appear in the mask they are treated as if they were whatever
1828/// value is necessary in order to fit the above forms.
1830 MVT VT, SDValue V1, SDValue V2,
1831 SelectionDAG &DAG) {
1832
1833 const auto &Begin = Mask.begin();
1834 const auto &Mid = Mask.begin() + Mask.size() / 2;
1835 const auto &End = Mask.end();
1836 SDValue OriV1 = V1, OriV2 = V2;
1837
1838 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1839 V1 = OriV1;
1840 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1841 V1 = OriV2;
1842 else
1843 return SDValue();
1844
1845 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1846 V2 = OriV1;
1847 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1848 V2 = OriV2;
1849 else
1850 return SDValue();
1851
1852 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1853}
1854
1855/// Lower VECTOR_SHUFFLE into VSHUF.
1856///
1857/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1858/// adding it as an operand to the resulting VSHUF.
1860 MVT VT, SDValue V1, SDValue V2,
1861 SelectionDAG &DAG) {
1862
1864 for (auto M : Mask)
1865 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1866
1867 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1868 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1869
1870 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1871 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1872 // VSHF concatenates the vectors in a bitwise fashion:
1873 // <0b00, 0b01> + <0b10, 0b11> ->
1874 // 0b0100 + 0b1110 -> 0b01001110
1875 // <0b10, 0b11, 0b00, 0b01>
1876 // We must therefore swap the operands to get the correct result.
1877 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1878}
1879
1880/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1881///
1882/// This routine breaks down the specific type of 128-bit shuffle and
1883/// dispatches to the lowering routines accordingly.
1885 SDValue V1, SDValue V2, SelectionDAG &DAG,
1886 const LoongArchSubtarget &Subtarget) {
1887 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1888 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1889 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1890 "Vector type is unsupported for lsx!");
1892 "Two operands have different types!");
1893 assert(VT.getVectorNumElements() == Mask.size() &&
1894 "Unexpected mask size for shuffle!");
1895 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1896
1897 APInt KnownUndef, KnownZero;
1898 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1899 APInt Zeroable = KnownUndef | KnownZero;
1900
1901 SDValue Result;
1902 // TODO: Add more comparison patterns.
1903 if (V2.isUndef()) {
1904 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1905 Subtarget)))
1906 return Result;
1907 if ((Result =
1908 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1909 return Result;
1910
1911 // TODO: This comment may be enabled in the future to better match the
1912 // pattern for instruction selection.
1913 /* V2 = V1; */
1914 }
1915
1916 // It is recommended not to change the pattern comparison order for better
1917 // performance.
1918 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1919 return Result;
1920 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1921 return Result;
1922 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1923 return Result;
1924 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1925 return Result;
1926 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1927 return Result;
1928 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1929 return Result;
1930 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1931 (Result =
1932 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1933 return Result;
1934 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1935 Zeroable)))
1936 return Result;
1937 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1938 Zeroable)))
1939 return Result;
1940 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1941 Subtarget)))
1942 return Result;
1943 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1944 return NewShuffle;
1945 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1946 return Result;
1947 return SDValue();
1948}
1949
1950/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1951///
1952/// It is a XVREPLVEI when the mask is:
1953/// <x, x, x, ..., x+n, x+n, x+n, ...>
1954/// where the number of x is equal to n and n is half the length of vector.
1955///
1956/// When undef's appear in the mask they are treated as if they were whatever
1957/// value is necessary in order to fit the above form.
1958static SDValue
1960 SDValue V1, SDValue V2, SelectionDAG &DAG,
1961 const LoongArchSubtarget &Subtarget) {
1962 int SplatIndex = -1;
1963 for (const auto &M : Mask) {
1964 if (M != -1) {
1965 SplatIndex = M;
1966 break;
1967 }
1968 }
1969
1970 if (SplatIndex == -1)
1971 return DAG.getUNDEF(VT);
1972
1973 const auto &Begin = Mask.begin();
1974 const auto &End = Mask.end();
1975 unsigned HalfSize = Mask.size() / 2;
1976
1977 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1978 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1979 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1980 0)) {
1981 APInt Imm(64, SplatIndex);
1982 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1983 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1984 }
1985
1986 return SDValue();
1987}
1988
1989/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1990static SDValue
1992 SDValue V1, SDValue V2, SelectionDAG &DAG,
1993 const LoongArchSubtarget &Subtarget) {
1994 // When the size is less than or equal to 4, lower cost instructions may be
1995 // used.
1996 if (Mask.size() <= 4)
1997 return SDValue();
1998 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1999}
2000
2001/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2003 MVT VT, SDValue V1, SDValue V2,
2004 SelectionDAG &DAG) {
2005 // LoongArch LASX only have XVPERM_W.
2006 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2007 return SDValue();
2008
2009 unsigned NumElts = VT.getVectorNumElements();
2010 unsigned HalfSize = NumElts / 2;
2011 bool FrontLo = true, FrontHi = true;
2012 bool BackLo = true, BackHi = true;
2013
2014 auto inRange = [](int val, int low, int high) {
2015 return (val == -1) || (val >= low && val < high);
2016 };
2017
2018 for (unsigned i = 0; i < HalfSize; ++i) {
2019 int Fronti = Mask[i];
2020 int Backi = Mask[i + HalfSize];
2021
2022 FrontLo &= inRange(Fronti, 0, HalfSize);
2023 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2024 BackLo &= inRange(Backi, 0, HalfSize);
2025 BackHi &= inRange(Backi, HalfSize, NumElts);
2026 }
2027
2028 // If both the lower and upper 128-bit parts access only one half of the
2029 // vector (either lower or upper), avoid using xvperm.w. The latency of
2030 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2031 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2032 return SDValue();
2033
2035 for (unsigned i = 0; i < NumElts; ++i)
2036 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2037 : DAG.getConstant(Mask[i], DL, MVT::i64));
2038 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2039
2040 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2041}
2042
2043/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2048}
2049
2050/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2052 MVT VT, SDValue V1, SDValue V2,
2053 SelectionDAG &DAG) {
2054 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2055}
2056
2057/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2059 MVT VT, SDValue V1, SDValue V2,
2060 SelectionDAG &DAG) {
2061
2062 const auto &Begin = Mask.begin();
2063 const auto &End = Mask.end();
2064 unsigned HalfSize = Mask.size() / 2;
2065 unsigned LeftSize = HalfSize / 2;
2066 SDValue OriV1 = V1, OriV2 = V2;
2067
2068 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2069 1) &&
2070 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2071 V1 = OriV1;
2072 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2073 Mask.size() + HalfSize - LeftSize, 1) &&
2074 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2075 Mask.size() + HalfSize + LeftSize, 1))
2076 V1 = OriV2;
2077 else
2078 return SDValue();
2079
2080 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2081 1) &&
2082 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2083 1))
2084 V2 = OriV1;
2085 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2086 Mask.size() + HalfSize - LeftSize, 1) &&
2087 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2088 Mask.size() + HalfSize + LeftSize, 1))
2089 V2 = OriV2;
2090 else
2091 return SDValue();
2092
2093 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2094}
2095
2096/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2098 MVT VT, SDValue V1, SDValue V2,
2099 SelectionDAG &DAG) {
2100
2101 const auto &Begin = Mask.begin();
2102 const auto &End = Mask.end();
2103 unsigned HalfSize = Mask.size() / 2;
2104 SDValue OriV1 = V1, OriV2 = V2;
2105
2106 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2107 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2108 V1 = OriV1;
2109 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2110 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2111 Mask.size() + HalfSize, 1))
2112 V1 = OriV2;
2113 else
2114 return SDValue();
2115
2116 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2117 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2118 V2 = OriV1;
2119 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2120 1) &&
2121 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2122 Mask.size() + HalfSize, 1))
2123 V2 = OriV2;
2124 else
2125 return SDValue();
2126
2127 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2128}
2129
2130/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2132 MVT VT, SDValue V1, SDValue V2,
2133 SelectionDAG &DAG) {
2134
2135 const auto &Begin = Mask.begin();
2136 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2137 const auto &Mid = Mask.begin() + Mask.size() / 2;
2138 const auto &RightMid = Mask.end() - Mask.size() / 4;
2139 const auto &End = Mask.end();
2140 unsigned HalfSize = Mask.size() / 2;
2141 SDValue OriV1 = V1, OriV2 = V2;
2142
2143 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2144 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2145 V1 = OriV1;
2146 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2147 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2148 V1 = OriV2;
2149 else
2150 return SDValue();
2151
2152 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2153 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2154 V2 = OriV1;
2155 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2156 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2157 V2 = OriV2;
2158
2159 else
2160 return SDValue();
2161
2162 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2163}
2164
2165/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2167 MVT VT, SDValue V1, SDValue V2,
2168 SelectionDAG &DAG) {
2169
2170 const auto &Begin = Mask.begin();
2171 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2172 const auto &Mid = Mask.begin() + Mask.size() / 2;
2173 const auto &RightMid = Mask.end() - Mask.size() / 4;
2174 const auto &End = Mask.end();
2175 unsigned HalfSize = Mask.size() / 2;
2176 SDValue OriV1 = V1, OriV2 = V2;
2177
2178 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2179 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2180 V1 = OriV1;
2181 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2182 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2183 2))
2184 V1 = OriV2;
2185 else
2186 return SDValue();
2187
2188 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2189 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2190 V2 = OriV1;
2191 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2192 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2193 2))
2194 V2 = OriV2;
2195 else
2196 return SDValue();
2197
2198 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2199}
2200
2201/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2203 MVT VT, SDValue V1, SDValue V2,
2204 SelectionDAG &DAG) {
2205
2206 int MaskSize = Mask.size();
2207 int HalfSize = Mask.size() / 2;
2208 const auto &Begin = Mask.begin();
2209 const auto &Mid = Mask.begin() + HalfSize;
2210 const auto &End = Mask.end();
2211
2212 // VECTOR_SHUFFLE concatenates the vectors:
2213 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2214 // shuffling ->
2215 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2216 //
2217 // XVSHUF concatenates the vectors:
2218 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2219 // shuffling ->
2220 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2221 SmallVector<SDValue, 8> MaskAlloc;
2222 for (auto it = Begin; it < Mid; it++) {
2223 if (*it < 0) // UNDEF
2224 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2225 else if ((*it >= 0 && *it < HalfSize) ||
2226 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2227 int M = *it < HalfSize ? *it : *it - HalfSize;
2228 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2229 } else
2230 return SDValue();
2231 }
2232 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2233
2234 for (auto it = Mid; it < End; it++) {
2235 if (*it < 0) // UNDEF
2236 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2237 else if ((*it >= HalfSize && *it < MaskSize) ||
2238 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2239 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2240 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2241 } else
2242 return SDValue();
2243 }
2244 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2245
2246 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2247 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2248 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2249}
2250
2251/// Shuffle vectors by lane to generate more optimized instructions.
2252/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2253///
2254/// Therefore, except for the following four cases, other cases are regarded
2255/// as cross-lane shuffles, where optimization is relatively limited.
2256///
2257/// - Shuffle high, low lanes of two inputs vector
2258/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2259/// - Shuffle low, high lanes of two inputs vector
2260/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2261/// - Shuffle low, low lanes of two inputs vector
2262/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2263/// - Shuffle high, high lanes of two inputs vector
2264/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2265///
2266/// The first case is the closest to LoongArch instructions and the other
2267/// cases need to be converted to it for processing.
2268///
2269/// This function may modify V1, V2 and Mask
2271 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2272 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2273
2274 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2275
2276 int MaskSize = Mask.size();
2277 int HalfSize = Mask.size() / 2;
2278 MVT GRLenVT = Subtarget.getGRLenVT();
2279
2280 HalfMaskType preMask = None, postMask = None;
2281
2282 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2283 return M < 0 || (M >= 0 && M < HalfSize) ||
2284 (M >= MaskSize && M < MaskSize + HalfSize);
2285 }))
2286 preMask = HighLaneTy;
2287 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2288 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2289 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2290 }))
2291 preMask = LowLaneTy;
2292
2293 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2294 return M < 0 || (M >= 0 && M < HalfSize) ||
2295 (M >= MaskSize && M < MaskSize + HalfSize);
2296 }))
2297 postMask = HighLaneTy;
2298 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2299 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2300 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2301 }))
2302 postMask = LowLaneTy;
2303
2304 // The pre-half of mask is high lane type, and the post-half of mask
2305 // is low lane type, which is closest to the LoongArch instructions.
2306 //
2307 // Note: In the LoongArch architecture, the high lane of mask corresponds
2308 // to the lower 128-bit of vector register, and the low lane of mask
2309 // corresponds the higher 128-bit of vector register.
2310 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2311 return;
2312 }
2313 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2314 V1 = DAG.getBitcast(MVT::v4i64, V1);
2315 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2316 DAG.getConstant(0b01001110, DL, GRLenVT));
2317 V1 = DAG.getBitcast(VT, V1);
2318
2319 if (!V2.isUndef()) {
2320 V2 = DAG.getBitcast(MVT::v4i64, V2);
2321 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2322 DAG.getConstant(0b01001110, DL, GRLenVT));
2323 V2 = DAG.getBitcast(VT, V2);
2324 }
2325
2326 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2327 *it = *it < 0 ? *it : *it - HalfSize;
2328 }
2329 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2330 *it = *it < 0 ? *it : *it + HalfSize;
2331 }
2332 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2333 V1 = DAG.getBitcast(MVT::v4i64, V1);
2334 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2335 DAG.getConstant(0b11101110, DL, GRLenVT));
2336 V1 = DAG.getBitcast(VT, V1);
2337
2338 if (!V2.isUndef()) {
2339 V2 = DAG.getBitcast(MVT::v4i64, V2);
2340 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2341 DAG.getConstant(0b11101110, DL, GRLenVT));
2342 V2 = DAG.getBitcast(VT, V2);
2343 }
2344
2345 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2346 *it = *it < 0 ? *it : *it - HalfSize;
2347 }
2348 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2349 V1 = DAG.getBitcast(MVT::v4i64, V1);
2350 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2351 DAG.getConstant(0b01000100, DL, GRLenVT));
2352 V1 = DAG.getBitcast(VT, V1);
2353
2354 if (!V2.isUndef()) {
2355 V2 = DAG.getBitcast(MVT::v4i64, V2);
2356 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2357 DAG.getConstant(0b01000100, DL, GRLenVT));
2358 V2 = DAG.getBitcast(VT, V2);
2359 }
2360
2361 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2362 *it = *it < 0 ? *it : *it + HalfSize;
2363 }
2364 } else { // cross-lane
2365 return;
2366 }
2367}
2368
2369/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2370/// Only for 256-bit vector.
2371///
2372/// For example:
2373/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2374/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2375/// is lowerded to:
2376/// (XVPERMI $xr2, $xr0, 78)
2377/// (XVSHUF $xr1, $xr2, $xr0)
2378/// (XVORI $xr0, $xr1, 0)
2380 ArrayRef<int> Mask,
2381 MVT VT, SDValue V1,
2382 SDValue V2,
2383 SelectionDAG &DAG) {
2384 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2385 int Size = Mask.size();
2386 int LaneSize = Size / 2;
2387
2388 bool LaneCrossing[2] = {false, false};
2389 for (int i = 0; i < Size; ++i)
2390 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2391 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2392
2393 // Ensure that all lanes ared involved.
2394 if (!LaneCrossing[0] && !LaneCrossing[1])
2395 return SDValue();
2396
2397 SmallVector<int> InLaneMask;
2398 InLaneMask.assign(Mask.begin(), Mask.end());
2399 for (int i = 0; i < Size; ++i) {
2400 int &M = InLaneMask[i];
2401 if (M < 0)
2402 continue;
2403 if (((M % Size) / LaneSize) != (i / LaneSize))
2404 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2405 }
2406
2407 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2408 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2409 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2410 Flipped = DAG.getBitcast(VT, Flipped);
2411 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2412}
2413
2414/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2415///
2416/// This routine breaks down the specific type of 256-bit shuffle and
2417/// dispatches to the lowering routines accordingly.
2419 SDValue V1, SDValue V2, SelectionDAG &DAG,
2420 const LoongArchSubtarget &Subtarget) {
2421 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2422 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2423 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2424 "Vector type is unsupported for lasx!");
2426 "Two operands have different types!");
2427 assert(VT.getVectorNumElements() == Mask.size() &&
2428 "Unexpected mask size for shuffle!");
2429 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2430 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2431
2432 // canonicalize non cross-lane shuffle vector
2433 SmallVector<int> NewMask(Mask);
2434 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2435
2436 APInt KnownUndef, KnownZero;
2437 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2438 APInt Zeroable = KnownUndef | KnownZero;
2439
2440 SDValue Result;
2441 // TODO: Add more comparison patterns.
2442 if (V2.isUndef()) {
2443 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2444 Subtarget)))
2445 return Result;
2446 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2447 Subtarget)))
2448 return Result;
2449 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2450 return Result;
2451 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2452 V1, V2, DAG)))
2453 return Result;
2454
2455 // TODO: This comment may be enabled in the future to better match the
2456 // pattern for instruction selection.
2457 /* V2 = V1; */
2458 }
2459
2460 // It is recommended not to change the pattern comparison order for better
2461 // performance.
2462 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2463 return Result;
2464 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2465 return Result;
2466 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2467 return Result;
2468 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2469 return Result;
2470 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2471 return Result;
2472 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2473 return Result;
2474 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2475 Subtarget, Zeroable)))
2476 return Result;
2477 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2478 Subtarget)))
2479 return Result;
2480 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2481 return NewShuffle;
2482 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2483 return Result;
2484
2485 return SDValue();
2486}
2487
2488SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2489 SelectionDAG &DAG) const {
2490 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2491 ArrayRef<int> OrigMask = SVOp->getMask();
2492 SDValue V1 = Op.getOperand(0);
2493 SDValue V2 = Op.getOperand(1);
2494 MVT VT = Op.getSimpleValueType();
2495 int NumElements = VT.getVectorNumElements();
2496 SDLoc DL(Op);
2497
2498 bool V1IsUndef = V1.isUndef();
2499 bool V2IsUndef = V2.isUndef();
2500 if (V1IsUndef && V2IsUndef)
2501 return DAG.getUNDEF(VT);
2502
2503 // When we create a shuffle node we put the UNDEF node to second operand,
2504 // but in some cases the first operand may be transformed to UNDEF.
2505 // In this case we should just commute the node.
2506 if (V1IsUndef)
2507 return DAG.getCommutedVectorShuffle(*SVOp);
2508
2509 // Check for non-undef masks pointing at an undef vector and make the masks
2510 // undef as well. This makes it easier to match the shuffle based solely on
2511 // the mask.
2512 if (V2IsUndef &&
2513 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2514 SmallVector<int, 8> NewMask(OrigMask);
2515 for (int &M : NewMask)
2516 if (M >= NumElements)
2517 M = -1;
2518 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2519 }
2520
2521 // Check for illegal shuffle mask element index values.
2522 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2523 (void)MaskUpperLimit;
2524 assert(llvm::all_of(OrigMask,
2525 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2526 "Out of bounds shuffle index");
2527
2528 // For each vector width, delegate to a specialized lowering routine.
2529 if (VT.is128BitVector())
2530 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2531
2532 if (VT.is256BitVector())
2533 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2534
2535 return SDValue();
2536}
2537
2538SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2539 SelectionDAG &DAG) const {
2540 // Custom lower to ensure the libcall return is passed in an FPR on hard
2541 // float ABIs.
2542 SDLoc DL(Op);
2543 MakeLibCallOptions CallOptions;
2544 SDValue Op0 = Op.getOperand(0);
2545 SDValue Chain = SDValue();
2546 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2547 SDValue Res;
2548 std::tie(Res, Chain) =
2549 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2550 if (Subtarget.is64Bit())
2551 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2552 return DAG.getBitcast(MVT::i32, Res);
2553}
2554
2555SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2556 SelectionDAG &DAG) const {
2557 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2558 // float ABIs.
2559 SDLoc DL(Op);
2560 MakeLibCallOptions CallOptions;
2561 SDValue Op0 = Op.getOperand(0);
2562 SDValue Chain = SDValue();
2563 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2564 DL, MVT::f32, Op0)
2565 : DAG.getBitcast(MVT::f32, Op0);
2566 SDValue Res;
2567 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2568 CallOptions, DL, Chain);
2569 return Res;
2570}
2571
2572SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2573 SelectionDAG &DAG) const {
2574 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2575 SDLoc DL(Op);
2576 MakeLibCallOptions CallOptions;
2577 RTLIB::Libcall LC =
2578 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2579 SDValue Res =
2580 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2581 if (Subtarget.is64Bit())
2582 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2583 return DAG.getBitcast(MVT::i32, Res);
2584}
2585
2586SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2587 SelectionDAG &DAG) const {
2588 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2589 MVT VT = Op.getSimpleValueType();
2590 SDLoc DL(Op);
2591 Op = DAG.getNode(
2592 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2593 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2594 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2595 DL, MVT::f32, Op)
2596 : DAG.getBitcast(MVT::f32, Op);
2597 if (VT != MVT::f32)
2598 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2599 return Res;
2600}
2601
2602// Lower BUILD_VECTOR as broadcast load (if possible).
2603// For example:
2604// %a = load i8, ptr %ptr
2605// %b = build_vector %a, %a, %a, %a
2606// is lowered to :
2607// (VLDREPL_B $a0, 0)
2609 const SDLoc &DL,
2610 SelectionDAG &DAG) {
2611 MVT VT = BVOp->getSimpleValueType(0);
2612 int NumOps = BVOp->getNumOperands();
2613
2614 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2615 "Unsupported vector type for broadcast.");
2616
2617 SDValue IdentitySrc;
2618 bool IsIdeneity = true;
2619
2620 for (int i = 0; i != NumOps; i++) {
2621 SDValue Op = BVOp->getOperand(i);
2622 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2623 IsIdeneity = false;
2624 break;
2625 }
2626 IdentitySrc = BVOp->getOperand(0);
2627 }
2628
2629 // make sure that this load is valid and only has one user.
2630 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2631 return SDValue();
2632
2633 auto *LN = cast<LoadSDNode>(IdentitySrc);
2634 auto ExtType = LN->getExtensionType();
2635
2636 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2637 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2638 SDVTList Tys =
2639 LN->isIndexed()
2640 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2641 : DAG.getVTList(VT, MVT::Other);
2642 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2643 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2644 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2645 return BCast;
2646 }
2647 return SDValue();
2648}
2649
2650SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2651 SelectionDAG &DAG) const {
2652 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2653 EVT ResTy = Op->getValueType(0);
2654 unsigned NumElts = ResTy.getVectorNumElements();
2655 SDLoc DL(Op);
2656 APInt SplatValue, SplatUndef;
2657 unsigned SplatBitSize;
2658 bool HasAnyUndefs;
2659 bool IsConstant = false;
2660 bool UseSameConstant = true;
2661 SDValue ConstantValue;
2662 bool Is128Vec = ResTy.is128BitVector();
2663 bool Is256Vec = ResTy.is256BitVector();
2664
2665 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2666 (!Subtarget.hasExtLASX() || !Is256Vec))
2667 return SDValue();
2668
2669 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2670 return Result;
2671
2672 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2673 /*MinSplatBits=*/8) &&
2674 SplatBitSize <= 64) {
2675 // We can only cope with 8, 16, 32, or 64-bit elements.
2676 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2677 SplatBitSize != 64)
2678 return SDValue();
2679
2680 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2681 // We can only handle 64-bit elements that are within
2682 // the signed 10-bit range on 32-bit targets.
2683 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2684 if (!SplatValue.isSignedIntN(10))
2685 return SDValue();
2686 if ((Is128Vec && ResTy == MVT::v4i32) ||
2687 (Is256Vec && ResTy == MVT::v8i32))
2688 return Op;
2689 }
2690
2691 EVT ViaVecTy;
2692
2693 switch (SplatBitSize) {
2694 default:
2695 return SDValue();
2696 case 8:
2697 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2698 break;
2699 case 16:
2700 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2701 break;
2702 case 32:
2703 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2704 break;
2705 case 64:
2706 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2707 break;
2708 }
2709
2710 // SelectionDAG::getConstant will promote SplatValue appropriately.
2711 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2712
2713 // Bitcast to the type we originally wanted.
2714 if (ViaVecTy != ResTy)
2715 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2716
2717 return Result;
2718 }
2719
2720 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2721 return Op;
2722
2723 for (unsigned i = 0; i < NumElts; ++i) {
2724 SDValue Opi = Node->getOperand(i);
2725 if (isIntOrFPConstant(Opi)) {
2726 IsConstant = true;
2727 if (!ConstantValue.getNode())
2728 ConstantValue = Opi;
2729 else if (ConstantValue != Opi)
2730 UseSameConstant = false;
2731 }
2732 }
2733
2734 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2735 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2736 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2737 for (unsigned i = 0; i < NumElts; ++i) {
2738 SDValue Opi = Node->getOperand(i);
2739 if (!isIntOrFPConstant(Opi))
2740 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2741 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2742 }
2743 return Result;
2744 }
2745
2746 if (!IsConstant) {
2747 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2748 // The resulting code is the same length as the expansion, but it doesn't
2749 // use memory operations.
2750 assert(ResTy.isVector());
2751
2752 SDValue Op0 = Node->getOperand(0);
2753 SDValue Vector = DAG.getUNDEF(ResTy);
2754
2755 if (!Op0.isUndef())
2756 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2757 for (unsigned i = 1; i < NumElts; ++i) {
2758 SDValue Opi = Node->getOperand(i);
2759 if (Opi.isUndef())
2760 continue;
2761 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2762 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2763 }
2764 return Vector;
2765 }
2766
2767 return SDValue();
2768}
2769
2770SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2771 SelectionDAG &DAG) const {
2772 SDLoc DL(Op);
2773 MVT ResVT = Op.getSimpleValueType();
2774 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2775
2776 unsigned NumOperands = Op.getNumOperands();
2777 unsigned NumFreezeUndef = 0;
2778 unsigned NumZero = 0;
2779 unsigned NumNonZero = 0;
2780 unsigned NonZeros = 0;
2781 SmallSet<SDValue, 4> Undefs;
2782 for (unsigned i = 0; i != NumOperands; ++i) {
2783 SDValue SubVec = Op.getOperand(i);
2784 if (SubVec.isUndef())
2785 continue;
2786 if (ISD::isFreezeUndef(SubVec.getNode())) {
2787 // If the freeze(undef) has multiple uses then we must fold to zero.
2788 if (SubVec.hasOneUse()) {
2789 ++NumFreezeUndef;
2790 } else {
2791 ++NumZero;
2792 Undefs.insert(SubVec);
2793 }
2794 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2795 ++NumZero;
2796 else {
2797 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2798 NonZeros |= 1 << i;
2799 ++NumNonZero;
2800 }
2801 }
2802
2803 // If we have more than 2 non-zeros, build each half separately.
2804 if (NumNonZero > 2) {
2805 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2806 ArrayRef<SDUse> Ops = Op->ops();
2807 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2808 Ops.slice(0, NumOperands / 2));
2809 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2810 Ops.slice(NumOperands / 2));
2811 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2812 }
2813
2814 // Otherwise, build it up through insert_subvectors.
2815 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2816 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2817 : DAG.getUNDEF(ResVT));
2818
2819 // Replace Undef operands with ZeroVector.
2820 for (SDValue U : Undefs)
2821 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2822
2823 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2824 unsigned NumSubElems = SubVT.getVectorNumElements();
2825 for (unsigned i = 0; i != NumOperands; ++i) {
2826 if ((NonZeros & (1 << i)) == 0)
2827 continue;
2828
2829 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2830 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2831 }
2832
2833 return Vec;
2834}
2835
2836SDValue
2837LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2838 SelectionDAG &DAG) const {
2839 MVT EltVT = Op.getSimpleValueType();
2840 SDValue Vec = Op->getOperand(0);
2841 EVT VecTy = Vec->getValueType(0);
2842 SDValue Idx = Op->getOperand(1);
2843 SDLoc DL(Op);
2844 MVT GRLenVT = Subtarget.getGRLenVT();
2845
2846 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
2847
2848 if (isa<ConstantSDNode>(Idx))
2849 return Op;
2850
2851 switch (VecTy.getSimpleVT().SimpleTy) {
2852 default:
2853 llvm_unreachable("Unexpected type");
2854 case MVT::v32i8:
2855 case MVT::v16i16:
2856 case MVT::v4i64:
2857 case MVT::v4f64: {
2858 // Extract the high half subvector and place it to the low half of a new
2859 // vector. It doesn't matter what the high half of the new vector is.
2860 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2861 SDValue VecHi =
2862 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2863 SDValue TmpVec =
2864 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2865 VecHi, DAG.getConstant(0, DL, GRLenVT));
2866
2867 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
2868 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
2869 // desired element.
2870 SDValue IdxCp =
2871 DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
2872 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
2873 SDValue MaskVec =
2874 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
2875 SDValue ResVec =
2876 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
2877
2878 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
2879 DAG.getConstant(0, DL, GRLenVT));
2880 }
2881 case MVT::v8i32:
2882 case MVT::v8f32: {
2883 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2884 SDValue SplatValue =
2885 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2886
2887 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2888 DAG.getConstant(0, DL, GRLenVT));
2889 }
2890 }
2891}
2892
2893SDValue
2894LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2895 SelectionDAG &DAG) const {
2896 MVT VT = Op.getSimpleValueType();
2897 MVT EltVT = VT.getVectorElementType();
2898 unsigned NumElts = VT.getVectorNumElements();
2899 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2900 SDLoc DL(Op);
2901 SDValue Op0 = Op.getOperand(0);
2902 SDValue Op1 = Op.getOperand(1);
2903 SDValue Op2 = Op.getOperand(2);
2904
2905 if (isa<ConstantSDNode>(Op2))
2906 return Op;
2907
2908 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2909 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2910
2911 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2912 return SDValue();
2913
2914 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2915 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2916
2917 SmallVector<SDValue, 32> RawIndices;
2918 for (unsigned i = 0; i < NumElts; ++i)
2919 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2920 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2921
2922 // insert vec, elt, idx
2923 // =>
2924 // select (splatidx == {0,1,2...}) ? splatelt : vec
2925 SDValue SelectCC =
2926 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2927 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2928}
2929
2930SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2931 SelectionDAG &DAG) const {
2932 SDLoc DL(Op);
2933 SyncScope::ID FenceSSID =
2934 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2935
2936 // singlethread fences only synchronize with signal handlers on the same
2937 // thread and thus only need to preserve instruction order, not actually
2938 // enforce memory ordering.
2939 if (FenceSSID == SyncScope::SingleThread)
2940 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2941 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2942
2943 return Op;
2944}
2945
2946SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2947 SelectionDAG &DAG) const {
2948
2949 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2950 DAG.getContext()->emitError(
2951 "On LA64, only 64-bit registers can be written.");
2952 return Op.getOperand(0);
2953 }
2954
2955 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2956 DAG.getContext()->emitError(
2957 "On LA32, only 32-bit registers can be written.");
2958 return Op.getOperand(0);
2959 }
2960
2961 return Op;
2962}
2963
2964SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2965 SelectionDAG &DAG) const {
2966 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2967 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2968 "be a constant integer");
2969 return SDValue();
2970 }
2971
2972 MachineFunction &MF = DAG.getMachineFunction();
2974 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2975 EVT VT = Op.getValueType();
2976 SDLoc DL(Op);
2977 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2978 unsigned Depth = Op.getConstantOperandVal(0);
2979 int GRLenInBytes = Subtarget.getGRLen() / 8;
2980
2981 while (Depth--) {
2982 int Offset = -(GRLenInBytes * 2);
2983 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2984 DAG.getSignedConstant(Offset, DL, VT));
2985 FrameAddr =
2986 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2987 }
2988 return FrameAddr;
2989}
2990
2991SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2992 SelectionDAG &DAG) const {
2993 // Currently only support lowering return address for current frame.
2994 if (Op.getConstantOperandVal(0) != 0) {
2995 DAG.getContext()->emitError(
2996 "return address can only be determined for the current frame");
2997 return SDValue();
2998 }
2999
3000 MachineFunction &MF = DAG.getMachineFunction();
3002 MVT GRLenVT = Subtarget.getGRLenVT();
3003
3004 // Return the value of the return address register, marking it an implicit
3005 // live-in.
3006 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3007 getRegClassFor(GRLenVT));
3008 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3009}
3010
3011SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3012 SelectionDAG &DAG) const {
3013 MachineFunction &MF = DAG.getMachineFunction();
3014 auto Size = Subtarget.getGRLen() / 8;
3015 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3016 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3017}
3018
3019SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3020 SelectionDAG &DAG) const {
3021 MachineFunction &MF = DAG.getMachineFunction();
3022 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3023
3024 SDLoc DL(Op);
3025 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3027
3028 // vastart just stores the address of the VarArgsFrameIndex slot into the
3029 // memory location argument.
3030 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3031 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3032 MachinePointerInfo(SV));
3033}
3034
3035SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3036 SelectionDAG &DAG) const {
3037 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3038 !Subtarget.hasBasicD() && "unexpected target features");
3039
3040 SDLoc DL(Op);
3041 SDValue Op0 = Op.getOperand(0);
3042 if (Op0->getOpcode() == ISD::AND) {
3043 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3044 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3045 return Op;
3046 }
3047
3048 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3049 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3050 Op0.getConstantOperandVal(2) == UINT64_C(0))
3051 return Op;
3052
3053 if (Op0.getOpcode() == ISD::AssertZext &&
3054 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3055 return Op;
3056
3057 EVT OpVT = Op0.getValueType();
3058 EVT RetVT = Op.getValueType();
3059 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3060 MakeLibCallOptions CallOptions;
3061 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3062 SDValue Chain = SDValue();
3064 std::tie(Result, Chain) =
3065 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3066 return Result;
3067}
3068
3069SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3070 SelectionDAG &DAG) const {
3071 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3072 !Subtarget.hasBasicD() && "unexpected target features");
3073
3074 SDLoc DL(Op);
3075 SDValue Op0 = Op.getOperand(0);
3076
3077 if ((Op0.getOpcode() == ISD::AssertSext ||
3079 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3080 return Op;
3081
3082 EVT OpVT = Op0.getValueType();
3083 EVT RetVT = Op.getValueType();
3084 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3085 MakeLibCallOptions CallOptions;
3086 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3087 SDValue Chain = SDValue();
3089 std::tie(Result, Chain) =
3090 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3091 return Result;
3092}
3093
3094SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3095 SelectionDAG &DAG) const {
3096
3097 SDLoc DL(Op);
3098 EVT VT = Op.getValueType();
3099 SDValue Op0 = Op.getOperand(0);
3100 EVT Op0VT = Op0.getValueType();
3101
3102 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3103 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3104 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3105 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3106 }
3107 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3108 SDValue Lo, Hi;
3109 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3110 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3111 }
3112 return Op;
3113}
3114
3115SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3116 SelectionDAG &DAG) const {
3117
3118 SDLoc DL(Op);
3119 SDValue Op0 = Op.getOperand(0);
3120
3121 if (Op0.getValueType() == MVT::f16)
3122 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3123
3124 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3125 !Subtarget.hasBasicD()) {
3126 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3127 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3128 }
3129
3130 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3131 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3132 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3133}
3134
3136 SelectionDAG &DAG, unsigned Flags) {
3137 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3138}
3139
3141 SelectionDAG &DAG, unsigned Flags) {
3142 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3143 Flags);
3144}
3145
3147 SelectionDAG &DAG, unsigned Flags) {
3148 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3149 N->getOffset(), Flags);
3150}
3151
3153 SelectionDAG &DAG, unsigned Flags) {
3154 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3155}
3156
3157template <class NodeTy>
3158SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3160 bool IsLocal) const {
3161 SDLoc DL(N);
3162 EVT Ty = getPointerTy(DAG.getDataLayout());
3163 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3164 SDValue Load;
3165
3166 switch (M) {
3167 default:
3168 report_fatal_error("Unsupported code model");
3169
3170 case CodeModel::Large: {
3171 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3172
3173 // This is not actually used, but is necessary for successfully matching
3174 // the PseudoLA_*_LARGE nodes.
3175 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3176 if (IsLocal) {
3177 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3178 // eventually becomes the desired 5-insn code sequence.
3179 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3180 Tmp, Addr),
3181 0);
3182 } else {
3183 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3184 // eventually becomes the desired 5-insn code sequence.
3185 Load = SDValue(
3186 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3187 0);
3188 }
3189 break;
3190 }
3191
3192 case CodeModel::Small:
3193 case CodeModel::Medium:
3194 if (IsLocal) {
3195 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3196 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3197 Load = SDValue(
3198 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3199 } else {
3200 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3201 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3202 Load =
3203 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3204 }
3205 }
3206
3207 if (!IsLocal) {
3208 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3209 MachineFunction &MF = DAG.getMachineFunction();
3210 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3214 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3215 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3216 }
3217
3218 return Load;
3219}
3220
3221SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3222 SelectionDAG &DAG) const {
3223 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3224 DAG.getTarget().getCodeModel());
3225}
3226
3227SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3228 SelectionDAG &DAG) const {
3229 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3230 DAG.getTarget().getCodeModel());
3231}
3232
3233SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3234 SelectionDAG &DAG) const {
3235 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3236 DAG.getTarget().getCodeModel());
3237}
3238
3239SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3240 SelectionDAG &DAG) const {
3241 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3242 assert(N->getOffset() == 0 && "unexpected offset in global node");
3243 auto CM = DAG.getTarget().getCodeModel();
3244 const GlobalValue *GV = N->getGlobal();
3245
3246 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3247 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3248 CM = *GCM;
3249 }
3250
3251 return getAddr(N, DAG, CM, GV->isDSOLocal());
3252}
3253
3254SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3255 SelectionDAG &DAG,
3256 unsigned Opc, bool UseGOT,
3257 bool Large) const {
3258 SDLoc DL(N);
3259 EVT Ty = getPointerTy(DAG.getDataLayout());
3260 MVT GRLenVT = Subtarget.getGRLenVT();
3261
3262 // This is not actually used, but is necessary for successfully matching the
3263 // PseudoLA_*_LARGE nodes.
3264 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3265 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3266
3267 // Only IE needs an extra argument for large code model.
3268 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3269 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3270 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3271
3272 // If it is LE for normal/medium code model, the add tp operation will occur
3273 // during the pseudo-instruction expansion.
3274 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3275 return Offset;
3276
3277 if (UseGOT) {
3278 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3279 MachineFunction &MF = DAG.getMachineFunction();
3280 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3284 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3285 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3286 }
3287
3288 // Add the thread pointer.
3289 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3290 DAG.getRegister(LoongArch::R2, GRLenVT));
3291}
3292
3293SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3294 SelectionDAG &DAG,
3295 unsigned Opc,
3296 bool Large) const {
3297 SDLoc DL(N);
3298 EVT Ty = getPointerTy(DAG.getDataLayout());
3299 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3300
3301 // This is not actually used, but is necessary for successfully matching the
3302 // PseudoLA_*_LARGE nodes.
3303 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3304
3305 // Use a PC-relative addressing mode to access the dynamic GOT address.
3306 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3307 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3308 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3309
3310 // Prepare argument list to generate call.
3312 Args.emplace_back(Load, CallTy);
3313
3314 // Setup call to __tls_get_addr.
3315 TargetLowering::CallLoweringInfo CLI(DAG);
3316 CLI.setDebugLoc(DL)
3317 .setChain(DAG.getEntryNode())
3318 .setLibCallee(CallingConv::C, CallTy,
3319 DAG.getExternalSymbol("__tls_get_addr", Ty),
3320 std::move(Args));
3321
3322 return LowerCallTo(CLI).first;
3323}
3324
3325SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3326 SelectionDAG &DAG, unsigned Opc,
3327 bool Large) const {
3328 SDLoc DL(N);
3329 EVT Ty = getPointerTy(DAG.getDataLayout());
3330 const GlobalValue *GV = N->getGlobal();
3331
3332 // This is not actually used, but is necessary for successfully matching the
3333 // PseudoLA_*_LARGE nodes.
3334 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3335
3336 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3337 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3338 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3339 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3340 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3341}
3342
3343SDValue
3344LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3345 SelectionDAG &DAG) const {
3348 report_fatal_error("In GHC calling convention TLS is not supported");
3349
3350 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3351 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3352
3353 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3354 assert(N->getOffset() == 0 && "unexpected offset in global node");
3355
3356 if (DAG.getTarget().useEmulatedTLS())
3357 reportFatalUsageError("the emulated TLS is prohibited");
3358
3359 bool IsDesc = DAG.getTarget().useTLSDESC();
3360
3361 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3363 // In this model, application code calls the dynamic linker function
3364 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3365 // runtime.
3366 if (!IsDesc)
3367 return getDynamicTLSAddr(N, DAG,
3368 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3369 : LoongArch::PseudoLA_TLS_GD,
3370 Large);
3371 break;
3373 // Same as GeneralDynamic, except for assembly modifiers and relocation
3374 // records.
3375 if (!IsDesc)
3376 return getDynamicTLSAddr(N, DAG,
3377 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3378 : LoongArch::PseudoLA_TLS_LD,
3379 Large);
3380 break;
3382 // This model uses the GOT to resolve TLS offsets.
3383 return getStaticTLSAddr(N, DAG,
3384 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3385 : LoongArch::PseudoLA_TLS_IE,
3386 /*UseGOT=*/true, Large);
3388 // This model is used when static linking as the TLS offsets are resolved
3389 // during program linking.
3390 //
3391 // This node doesn't need an extra argument for the large code model.
3392 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3393 /*UseGOT=*/false, Large);
3394 }
3395
3396 return getTLSDescAddr(N, DAG,
3397 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3398 : LoongArch::PseudoLA_TLS_DESC,
3399 Large);
3400}
3401
3402template <unsigned N>
3404 SelectionDAG &DAG, bool IsSigned = false) {
3405 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3406 // Check the ImmArg.
3407 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3408 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3409 DAG.getContext()->emitError(Op->getOperationName(0) +
3410 ": argument out of range.");
3411 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3412 }
3413 return SDValue();
3414}
3415
3416SDValue
3417LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3418 SelectionDAG &DAG) const {
3419 switch (Op.getConstantOperandVal(0)) {
3420 default:
3421 return SDValue(); // Don't custom lower most intrinsics.
3422 case Intrinsic::thread_pointer: {
3423 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3424 return DAG.getRegister(LoongArch::R2, PtrVT);
3425 }
3426 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3427 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3428 case Intrinsic::loongarch_lsx_vreplvei_d:
3429 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3430 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3431 case Intrinsic::loongarch_lsx_vreplvei_w:
3432 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3433 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3434 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3435 case Intrinsic::loongarch_lasx_xvpickve_d:
3436 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3437 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3438 case Intrinsic::loongarch_lasx_xvinsve0_d:
3439 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3440 case Intrinsic::loongarch_lsx_vsat_b:
3441 case Intrinsic::loongarch_lsx_vsat_bu:
3442 case Intrinsic::loongarch_lsx_vrotri_b:
3443 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3444 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3445 case Intrinsic::loongarch_lsx_vsrlri_b:
3446 case Intrinsic::loongarch_lsx_vsrari_b:
3447 case Intrinsic::loongarch_lsx_vreplvei_h:
3448 case Intrinsic::loongarch_lasx_xvsat_b:
3449 case Intrinsic::loongarch_lasx_xvsat_bu:
3450 case Intrinsic::loongarch_lasx_xvrotri_b:
3451 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3452 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3453 case Intrinsic::loongarch_lasx_xvsrlri_b:
3454 case Intrinsic::loongarch_lasx_xvsrari_b:
3455 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3456 case Intrinsic::loongarch_lasx_xvpickve_w:
3457 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3458 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3459 case Intrinsic::loongarch_lasx_xvinsve0_w:
3460 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3461 case Intrinsic::loongarch_lsx_vsat_h:
3462 case Intrinsic::loongarch_lsx_vsat_hu:
3463 case Intrinsic::loongarch_lsx_vrotri_h:
3464 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3465 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3466 case Intrinsic::loongarch_lsx_vsrlri_h:
3467 case Intrinsic::loongarch_lsx_vsrari_h:
3468 case Intrinsic::loongarch_lsx_vreplvei_b:
3469 case Intrinsic::loongarch_lasx_xvsat_h:
3470 case Intrinsic::loongarch_lasx_xvsat_hu:
3471 case Intrinsic::loongarch_lasx_xvrotri_h:
3472 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3473 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3474 case Intrinsic::loongarch_lasx_xvsrlri_h:
3475 case Intrinsic::loongarch_lasx_xvsrari_h:
3476 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3477 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3478 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3479 case Intrinsic::loongarch_lsx_vsrani_b_h:
3480 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3481 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3482 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3483 case Intrinsic::loongarch_lsx_vssrani_b_h:
3484 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3485 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3486 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3487 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3488 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3489 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3490 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3491 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3492 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3493 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3494 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3495 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3496 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3497 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3498 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3499 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3500 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3501 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3502 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3503 case Intrinsic::loongarch_lsx_vsat_w:
3504 case Intrinsic::loongarch_lsx_vsat_wu:
3505 case Intrinsic::loongarch_lsx_vrotri_w:
3506 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3507 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3508 case Intrinsic::loongarch_lsx_vsrlri_w:
3509 case Intrinsic::loongarch_lsx_vsrari_w:
3510 case Intrinsic::loongarch_lsx_vslei_bu:
3511 case Intrinsic::loongarch_lsx_vslei_hu:
3512 case Intrinsic::loongarch_lsx_vslei_wu:
3513 case Intrinsic::loongarch_lsx_vslei_du:
3514 case Intrinsic::loongarch_lsx_vslti_bu:
3515 case Intrinsic::loongarch_lsx_vslti_hu:
3516 case Intrinsic::loongarch_lsx_vslti_wu:
3517 case Intrinsic::loongarch_lsx_vslti_du:
3518 case Intrinsic::loongarch_lsx_vbsll_v:
3519 case Intrinsic::loongarch_lsx_vbsrl_v:
3520 case Intrinsic::loongarch_lasx_xvsat_w:
3521 case Intrinsic::loongarch_lasx_xvsat_wu:
3522 case Intrinsic::loongarch_lasx_xvrotri_w:
3523 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3524 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3525 case Intrinsic::loongarch_lasx_xvsrlri_w:
3526 case Intrinsic::loongarch_lasx_xvsrari_w:
3527 case Intrinsic::loongarch_lasx_xvslei_bu:
3528 case Intrinsic::loongarch_lasx_xvslei_hu:
3529 case Intrinsic::loongarch_lasx_xvslei_wu:
3530 case Intrinsic::loongarch_lasx_xvslei_du:
3531 case Intrinsic::loongarch_lasx_xvslti_bu:
3532 case Intrinsic::loongarch_lasx_xvslti_hu:
3533 case Intrinsic::loongarch_lasx_xvslti_wu:
3534 case Intrinsic::loongarch_lasx_xvslti_du:
3535 case Intrinsic::loongarch_lasx_xvbsll_v:
3536 case Intrinsic::loongarch_lasx_xvbsrl_v:
3537 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3538 case Intrinsic::loongarch_lsx_vseqi_b:
3539 case Intrinsic::loongarch_lsx_vseqi_h:
3540 case Intrinsic::loongarch_lsx_vseqi_w:
3541 case Intrinsic::loongarch_lsx_vseqi_d:
3542 case Intrinsic::loongarch_lsx_vslei_b:
3543 case Intrinsic::loongarch_lsx_vslei_h:
3544 case Intrinsic::loongarch_lsx_vslei_w:
3545 case Intrinsic::loongarch_lsx_vslei_d:
3546 case Intrinsic::loongarch_lsx_vslti_b:
3547 case Intrinsic::loongarch_lsx_vslti_h:
3548 case Intrinsic::loongarch_lsx_vslti_w:
3549 case Intrinsic::loongarch_lsx_vslti_d:
3550 case Intrinsic::loongarch_lasx_xvseqi_b:
3551 case Intrinsic::loongarch_lasx_xvseqi_h:
3552 case Intrinsic::loongarch_lasx_xvseqi_w:
3553 case Intrinsic::loongarch_lasx_xvseqi_d:
3554 case Intrinsic::loongarch_lasx_xvslei_b:
3555 case Intrinsic::loongarch_lasx_xvslei_h:
3556 case Intrinsic::loongarch_lasx_xvslei_w:
3557 case Intrinsic::loongarch_lasx_xvslei_d:
3558 case Intrinsic::loongarch_lasx_xvslti_b:
3559 case Intrinsic::loongarch_lasx_xvslti_h:
3560 case Intrinsic::loongarch_lasx_xvslti_w:
3561 case Intrinsic::loongarch_lasx_xvslti_d:
3562 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3563 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3564 case Intrinsic::loongarch_lsx_vsrani_h_w:
3565 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3566 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3567 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3568 case Intrinsic::loongarch_lsx_vssrani_h_w:
3569 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3570 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3571 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3572 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3573 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3574 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3575 case Intrinsic::loongarch_lsx_vfrstpi_b:
3576 case Intrinsic::loongarch_lsx_vfrstpi_h:
3577 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3578 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3579 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3580 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3581 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3582 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3583 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3584 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3585 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3586 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3587 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3588 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3589 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3590 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3591 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3592 case Intrinsic::loongarch_lsx_vsat_d:
3593 case Intrinsic::loongarch_lsx_vsat_du:
3594 case Intrinsic::loongarch_lsx_vrotri_d:
3595 case Intrinsic::loongarch_lsx_vsrlri_d:
3596 case Intrinsic::loongarch_lsx_vsrari_d:
3597 case Intrinsic::loongarch_lasx_xvsat_d:
3598 case Intrinsic::loongarch_lasx_xvsat_du:
3599 case Intrinsic::loongarch_lasx_xvrotri_d:
3600 case Intrinsic::loongarch_lasx_xvsrlri_d:
3601 case Intrinsic::loongarch_lasx_xvsrari_d:
3602 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3603 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3604 case Intrinsic::loongarch_lsx_vsrani_w_d:
3605 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3606 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3607 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3608 case Intrinsic::loongarch_lsx_vssrani_w_d:
3609 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3610 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3611 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3612 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3613 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3614 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3615 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3616 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3617 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3618 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3619 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3620 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3621 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3622 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3623 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3624 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3625 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3626 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3627 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3628 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3629 case Intrinsic::loongarch_lsx_vsrani_d_q:
3630 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3631 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3632 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3633 case Intrinsic::loongarch_lsx_vssrani_d_q:
3634 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3635 case Intrinsic::loongarch_lsx_vssrani_du_q:
3636 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3637 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3638 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3639 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3640 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3641 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3642 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3643 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3644 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3645 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3646 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3647 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3648 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3649 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3650 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3651 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3652 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3653 case Intrinsic::loongarch_lsx_vnori_b:
3654 case Intrinsic::loongarch_lsx_vshuf4i_b:
3655 case Intrinsic::loongarch_lsx_vshuf4i_h:
3656 case Intrinsic::loongarch_lsx_vshuf4i_w:
3657 case Intrinsic::loongarch_lasx_xvnori_b:
3658 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3659 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3660 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3661 case Intrinsic::loongarch_lasx_xvpermi_d:
3662 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3663 case Intrinsic::loongarch_lsx_vshuf4i_d:
3664 case Intrinsic::loongarch_lsx_vpermi_w:
3665 case Intrinsic::loongarch_lsx_vbitseli_b:
3666 case Intrinsic::loongarch_lsx_vextrins_b:
3667 case Intrinsic::loongarch_lsx_vextrins_h:
3668 case Intrinsic::loongarch_lsx_vextrins_w:
3669 case Intrinsic::loongarch_lsx_vextrins_d:
3670 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3671 case Intrinsic::loongarch_lasx_xvpermi_w:
3672 case Intrinsic::loongarch_lasx_xvpermi_q:
3673 case Intrinsic::loongarch_lasx_xvbitseli_b:
3674 case Intrinsic::loongarch_lasx_xvextrins_b:
3675 case Intrinsic::loongarch_lasx_xvextrins_h:
3676 case Intrinsic::loongarch_lasx_xvextrins_w:
3677 case Intrinsic::loongarch_lasx_xvextrins_d:
3678 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3679 case Intrinsic::loongarch_lsx_vrepli_b:
3680 case Intrinsic::loongarch_lsx_vrepli_h:
3681 case Intrinsic::loongarch_lsx_vrepli_w:
3682 case Intrinsic::loongarch_lsx_vrepli_d:
3683 case Intrinsic::loongarch_lasx_xvrepli_b:
3684 case Intrinsic::loongarch_lasx_xvrepli_h:
3685 case Intrinsic::loongarch_lasx_xvrepli_w:
3686 case Intrinsic::loongarch_lasx_xvrepli_d:
3687 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3688 case Intrinsic::loongarch_lsx_vldi:
3689 case Intrinsic::loongarch_lasx_xvldi:
3690 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3691 }
3692}
3693
3694// Helper function that emits error message for intrinsics with chain and return
3695// merge values of a UNDEF and the chain.
3697 StringRef ErrorMsg,
3698 SelectionDAG &DAG) {
3699 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3700 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3701 SDLoc(Op));
3702}
3703
3704SDValue
3705LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3706 SelectionDAG &DAG) const {
3707 SDLoc DL(Op);
3708 MVT GRLenVT = Subtarget.getGRLenVT();
3709 EVT VT = Op.getValueType();
3710 SDValue Chain = Op.getOperand(0);
3711 const StringRef ErrorMsgOOR = "argument out of range";
3712 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3713 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3714
3715 switch (Op.getConstantOperandVal(1)) {
3716 default:
3717 return Op;
3718 case Intrinsic::loongarch_crc_w_b_w:
3719 case Intrinsic::loongarch_crc_w_h_w:
3720 case Intrinsic::loongarch_crc_w_w_w:
3721 case Intrinsic::loongarch_crc_w_d_w:
3722 case Intrinsic::loongarch_crcc_w_b_w:
3723 case Intrinsic::loongarch_crcc_w_h_w:
3724 case Intrinsic::loongarch_crcc_w_w_w:
3725 case Intrinsic::loongarch_crcc_w_d_w:
3726 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3727 case Intrinsic::loongarch_csrrd_w:
3728 case Intrinsic::loongarch_csrrd_d: {
3729 unsigned Imm = Op.getConstantOperandVal(2);
3730 return !isUInt<14>(Imm)
3731 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3732 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3733 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3734 }
3735 case Intrinsic::loongarch_csrwr_w:
3736 case Intrinsic::loongarch_csrwr_d: {
3737 unsigned Imm = Op.getConstantOperandVal(3);
3738 return !isUInt<14>(Imm)
3739 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3740 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3741 {Chain, Op.getOperand(2),
3742 DAG.getConstant(Imm, DL, GRLenVT)});
3743 }
3744 case Intrinsic::loongarch_csrxchg_w:
3745 case Intrinsic::loongarch_csrxchg_d: {
3746 unsigned Imm = Op.getConstantOperandVal(4);
3747 return !isUInt<14>(Imm)
3748 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3749 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3750 {Chain, Op.getOperand(2), Op.getOperand(3),
3751 DAG.getConstant(Imm, DL, GRLenVT)});
3752 }
3753 case Intrinsic::loongarch_iocsrrd_d: {
3754 return DAG.getNode(
3755 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3756 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3757 }
3758#define IOCSRRD_CASE(NAME, NODE) \
3759 case Intrinsic::loongarch_##NAME: { \
3760 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3761 {Chain, Op.getOperand(2)}); \
3762 }
3763 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3764 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3765 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3766#undef IOCSRRD_CASE
3767 case Intrinsic::loongarch_cpucfg: {
3768 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3769 {Chain, Op.getOperand(2)});
3770 }
3771 case Intrinsic::loongarch_lddir_d: {
3772 unsigned Imm = Op.getConstantOperandVal(3);
3773 return !isUInt<8>(Imm)
3774 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3775 : Op;
3776 }
3777 case Intrinsic::loongarch_movfcsr2gr: {
3778 if (!Subtarget.hasBasicF())
3779 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3780 unsigned Imm = Op.getConstantOperandVal(2);
3781 return !isUInt<2>(Imm)
3782 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3783 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3784 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3785 }
3786 case Intrinsic::loongarch_lsx_vld:
3787 case Intrinsic::loongarch_lsx_vldrepl_b:
3788 case Intrinsic::loongarch_lasx_xvld:
3789 case Intrinsic::loongarch_lasx_xvldrepl_b:
3790 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3791 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3792 : SDValue();
3793 case Intrinsic::loongarch_lsx_vldrepl_h:
3794 case Intrinsic::loongarch_lasx_xvldrepl_h:
3795 return !isShiftedInt<11, 1>(
3796 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3798 Op, "argument out of range or not a multiple of 2", DAG)
3799 : SDValue();
3800 case Intrinsic::loongarch_lsx_vldrepl_w:
3801 case Intrinsic::loongarch_lasx_xvldrepl_w:
3802 return !isShiftedInt<10, 2>(
3803 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3805 Op, "argument out of range or not a multiple of 4", DAG)
3806 : SDValue();
3807 case Intrinsic::loongarch_lsx_vldrepl_d:
3808 case Intrinsic::loongarch_lasx_xvldrepl_d:
3809 return !isShiftedInt<9, 3>(
3810 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3812 Op, "argument out of range or not a multiple of 8", DAG)
3813 : SDValue();
3814 }
3815}
3816
3817// Helper function that emits error message for intrinsics with void return
3818// value and return the chain.
3820 SelectionDAG &DAG) {
3821
3822 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3823 return Op.getOperand(0);
3824}
3825
3826SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3827 SelectionDAG &DAG) const {
3828 SDLoc DL(Op);
3829 MVT GRLenVT = Subtarget.getGRLenVT();
3830 SDValue Chain = Op.getOperand(0);
3831 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3832 SDValue Op2 = Op.getOperand(2);
3833 const StringRef ErrorMsgOOR = "argument out of range";
3834 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3835 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3836 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3837
3838 switch (IntrinsicEnum) {
3839 default:
3840 // TODO: Add more Intrinsics.
3841 return SDValue();
3842 case Intrinsic::loongarch_cacop_d:
3843 case Intrinsic::loongarch_cacop_w: {
3844 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3845 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3846 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3847 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3848 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3849 unsigned Imm1 = Op2->getAsZExtVal();
3850 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3851 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3852 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3853 return Op;
3854 }
3855 case Intrinsic::loongarch_dbar: {
3856 unsigned Imm = Op2->getAsZExtVal();
3857 return !isUInt<15>(Imm)
3858 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3859 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3860 DAG.getConstant(Imm, DL, GRLenVT));
3861 }
3862 case Intrinsic::loongarch_ibar: {
3863 unsigned Imm = Op2->getAsZExtVal();
3864 return !isUInt<15>(Imm)
3865 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3866 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3867 DAG.getConstant(Imm, DL, GRLenVT));
3868 }
3869 case Intrinsic::loongarch_break: {
3870 unsigned Imm = Op2->getAsZExtVal();
3871 return !isUInt<15>(Imm)
3872 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3873 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3874 DAG.getConstant(Imm, DL, GRLenVT));
3875 }
3876 case Intrinsic::loongarch_movgr2fcsr: {
3877 if (!Subtarget.hasBasicF())
3878 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3879 unsigned Imm = Op2->getAsZExtVal();
3880 return !isUInt<2>(Imm)
3881 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3882 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3883 DAG.getConstant(Imm, DL, GRLenVT),
3884 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3885 Op.getOperand(3)));
3886 }
3887 case Intrinsic::loongarch_syscall: {
3888 unsigned Imm = Op2->getAsZExtVal();
3889 return !isUInt<15>(Imm)
3890 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3891 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3892 DAG.getConstant(Imm, DL, GRLenVT));
3893 }
3894#define IOCSRWR_CASE(NAME, NODE) \
3895 case Intrinsic::loongarch_##NAME: { \
3896 SDValue Op3 = Op.getOperand(3); \
3897 return Subtarget.is64Bit() \
3898 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3899 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3900 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3901 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3902 Op3); \
3903 }
3904 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3905 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3906 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3907#undef IOCSRWR_CASE
3908 case Intrinsic::loongarch_iocsrwr_d: {
3909 return !Subtarget.is64Bit()
3910 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3911 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3912 Op2,
3913 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3914 Op.getOperand(3)));
3915 }
3916#define ASRT_LE_GT_CASE(NAME) \
3917 case Intrinsic::loongarch_##NAME: { \
3918 return !Subtarget.is64Bit() \
3919 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3920 : Op; \
3921 }
3922 ASRT_LE_GT_CASE(asrtle_d)
3923 ASRT_LE_GT_CASE(asrtgt_d)
3924#undef ASRT_LE_GT_CASE
3925 case Intrinsic::loongarch_ldpte_d: {
3926 unsigned Imm = Op.getConstantOperandVal(3);
3927 return !Subtarget.is64Bit()
3928 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3929 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3930 : Op;
3931 }
3932 case Intrinsic::loongarch_lsx_vst:
3933 case Intrinsic::loongarch_lasx_xvst:
3934 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3935 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3936 : SDValue();
3937 case Intrinsic::loongarch_lasx_xvstelm_b:
3938 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3939 !isUInt<5>(Op.getConstantOperandVal(5)))
3940 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3941 : SDValue();
3942 case Intrinsic::loongarch_lsx_vstelm_b:
3943 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3944 !isUInt<4>(Op.getConstantOperandVal(5)))
3945 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3946 : SDValue();
3947 case Intrinsic::loongarch_lasx_xvstelm_h:
3948 return (!isShiftedInt<8, 1>(
3949 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3950 !isUInt<4>(Op.getConstantOperandVal(5)))
3952 Op, "argument out of range or not a multiple of 2", DAG)
3953 : SDValue();
3954 case Intrinsic::loongarch_lsx_vstelm_h:
3955 return (!isShiftedInt<8, 1>(
3956 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3957 !isUInt<3>(Op.getConstantOperandVal(5)))
3959 Op, "argument out of range or not a multiple of 2", DAG)
3960 : SDValue();
3961 case Intrinsic::loongarch_lasx_xvstelm_w:
3962 return (!isShiftedInt<8, 2>(
3963 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3964 !isUInt<3>(Op.getConstantOperandVal(5)))
3966 Op, "argument out of range or not a multiple of 4", DAG)
3967 : SDValue();
3968 case Intrinsic::loongarch_lsx_vstelm_w:
3969 return (!isShiftedInt<8, 2>(
3970 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3971 !isUInt<2>(Op.getConstantOperandVal(5)))
3973 Op, "argument out of range or not a multiple of 4", DAG)
3974 : SDValue();
3975 case Intrinsic::loongarch_lasx_xvstelm_d:
3976 return (!isShiftedInt<8, 3>(
3977 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3978 !isUInt<2>(Op.getConstantOperandVal(5)))
3980 Op, "argument out of range or not a multiple of 8", DAG)
3981 : SDValue();
3982 case Intrinsic::loongarch_lsx_vstelm_d:
3983 return (!isShiftedInt<8, 3>(
3984 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3985 !isUInt<1>(Op.getConstantOperandVal(5)))
3987 Op, "argument out of range or not a multiple of 8", DAG)
3988 : SDValue();
3989 }
3990}
3991
3992SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3993 SelectionDAG &DAG) const {
3994 SDLoc DL(Op);
3995 SDValue Lo = Op.getOperand(0);
3996 SDValue Hi = Op.getOperand(1);
3997 SDValue Shamt = Op.getOperand(2);
3998 EVT VT = Lo.getValueType();
3999
4000 // if Shamt-GRLen < 0: // Shamt < GRLen
4001 // Lo = Lo << Shamt
4002 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4003 // else:
4004 // Lo = 0
4005 // Hi = Lo << (Shamt-GRLen)
4006
4007 SDValue Zero = DAG.getConstant(0, DL, VT);
4008 SDValue One = DAG.getConstant(1, DL, VT);
4009 SDValue MinusGRLen =
4010 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4011 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4012 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4013 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4014
4015 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4016 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4017 SDValue ShiftRightLo =
4018 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4019 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4020 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4021 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4022
4023 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4024
4025 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4026 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4027
4028 SDValue Parts[2] = {Lo, Hi};
4029 return DAG.getMergeValues(Parts, DL);
4030}
4031
4032SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4033 SelectionDAG &DAG,
4034 bool IsSRA) const {
4035 SDLoc DL(Op);
4036 SDValue Lo = Op.getOperand(0);
4037 SDValue Hi = Op.getOperand(1);
4038 SDValue Shamt = Op.getOperand(2);
4039 EVT VT = Lo.getValueType();
4040
4041 // SRA expansion:
4042 // if Shamt-GRLen < 0: // Shamt < GRLen
4043 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4044 // Hi = Hi >>s Shamt
4045 // else:
4046 // Lo = Hi >>s (Shamt-GRLen);
4047 // Hi = Hi >>s (GRLen-1)
4048 //
4049 // SRL expansion:
4050 // if Shamt-GRLen < 0: // Shamt < GRLen
4051 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4052 // Hi = Hi >>u Shamt
4053 // else:
4054 // Lo = Hi >>u (Shamt-GRLen);
4055 // Hi = 0;
4056
4057 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4058
4059 SDValue Zero = DAG.getConstant(0, DL, VT);
4060 SDValue One = DAG.getConstant(1, DL, VT);
4061 SDValue MinusGRLen =
4062 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4063 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4064 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4065 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4066
4067 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4068 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4069 SDValue ShiftLeftHi =
4070 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4071 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4072 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4073 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4074 SDValue HiFalse =
4075 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4076
4077 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4078
4079 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4080 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4081
4082 SDValue Parts[2] = {Lo, Hi};
4083 return DAG.getMergeValues(Parts, DL);
4084}
4085
4086// Returns the opcode of the target-specific SDNode that implements the 32-bit
4087// form of the given Opcode.
4089 switch (Opcode) {
4090 default:
4091 llvm_unreachable("Unexpected opcode");
4092 case ISD::SDIV:
4093 return LoongArchISD::DIV_W;
4094 case ISD::UDIV:
4095 return LoongArchISD::DIV_WU;
4096 case ISD::SREM:
4097 return LoongArchISD::MOD_W;
4098 case ISD::UREM:
4099 return LoongArchISD::MOD_WU;
4100 case ISD::SHL:
4101 return LoongArchISD::SLL_W;
4102 case ISD::SRA:
4103 return LoongArchISD::SRA_W;
4104 case ISD::SRL:
4105 return LoongArchISD::SRL_W;
4106 case ISD::ROTL:
4107 case ISD::ROTR:
4108 return LoongArchISD::ROTR_W;
4109 case ISD::CTTZ:
4110 return LoongArchISD::CTZ_W;
4111 case ISD::CTLZ:
4112 return LoongArchISD::CLZ_W;
4113 }
4114}
4115
4116// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4117// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4118// otherwise be promoted to i64, making it difficult to select the
4119// SLL_W/.../*W later one because the fact the operation was originally of
4120// type i8/i16/i32 is lost.
4122 unsigned ExtOpc = ISD::ANY_EXTEND) {
4123 SDLoc DL(N);
4124 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4125 SDValue NewOp0, NewRes;
4126
4127 switch (NumOp) {
4128 default:
4129 llvm_unreachable("Unexpected NumOp");
4130 case 1: {
4131 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4132 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4133 break;
4134 }
4135 case 2: {
4136 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4137 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4138 if (N->getOpcode() == ISD::ROTL) {
4139 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4140 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4141 }
4142 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4143 break;
4144 }
4145 // TODO:Handle more NumOp.
4146 }
4147
4148 // ReplaceNodeResults requires we maintain the same type for the return
4149 // value.
4150 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4151}
4152
4153// Converts the given 32-bit operation to a i64 operation with signed extension
4154// semantic to reduce the signed extension instructions.
4156 SDLoc DL(N);
4157 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4158 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4159 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4160 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4161 DAG.getValueType(MVT::i32));
4162 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4163}
4164
4165// Helper function that emits error message for intrinsics with/without chain
4166// and return a UNDEF or and the chain as the results.
4169 StringRef ErrorMsg, bool WithChain = true) {
4170 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4171 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4172 if (!WithChain)
4173 return;
4174 Results.push_back(N->getOperand(0));
4175}
4176
4177template <unsigned N>
4178static void
4180 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4181 unsigned ResOp) {
4182 const StringRef ErrorMsgOOR = "argument out of range";
4183 unsigned Imm = Node->getConstantOperandVal(2);
4184 if (!isUInt<N>(Imm)) {
4186 /*WithChain=*/false);
4187 return;
4188 }
4189 SDLoc DL(Node);
4190 SDValue Vec = Node->getOperand(1);
4191
4192 SDValue PickElt =
4193 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4194 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4196 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4197 PickElt.getValue(0)));
4198}
4199
4202 SelectionDAG &DAG,
4203 const LoongArchSubtarget &Subtarget,
4204 unsigned ResOp) {
4205 SDLoc DL(N);
4206 SDValue Vec = N->getOperand(1);
4207
4208 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4209 Results.push_back(
4210 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4211}
4212
4213static void
4215 SelectionDAG &DAG,
4216 const LoongArchSubtarget &Subtarget) {
4217 switch (N->getConstantOperandVal(0)) {
4218 default:
4219 llvm_unreachable("Unexpected Intrinsic.");
4220 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4221 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4223 break;
4224 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4225 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4226 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4228 break;
4229 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4230 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4232 break;
4233 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4234 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4236 break;
4237 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4238 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4239 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4241 break;
4242 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4243 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4245 break;
4246 case Intrinsic::loongarch_lsx_bz_b:
4247 case Intrinsic::loongarch_lsx_bz_h:
4248 case Intrinsic::loongarch_lsx_bz_w:
4249 case Intrinsic::loongarch_lsx_bz_d:
4250 case Intrinsic::loongarch_lasx_xbz_b:
4251 case Intrinsic::loongarch_lasx_xbz_h:
4252 case Intrinsic::loongarch_lasx_xbz_w:
4253 case Intrinsic::loongarch_lasx_xbz_d:
4254 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4256 break;
4257 case Intrinsic::loongarch_lsx_bz_v:
4258 case Intrinsic::loongarch_lasx_xbz_v:
4259 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4261 break;
4262 case Intrinsic::loongarch_lsx_bnz_b:
4263 case Intrinsic::loongarch_lsx_bnz_h:
4264 case Intrinsic::loongarch_lsx_bnz_w:
4265 case Intrinsic::loongarch_lsx_bnz_d:
4266 case Intrinsic::loongarch_lasx_xbnz_b:
4267 case Intrinsic::loongarch_lasx_xbnz_h:
4268 case Intrinsic::loongarch_lasx_xbnz_w:
4269 case Intrinsic::loongarch_lasx_xbnz_d:
4270 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4272 break;
4273 case Intrinsic::loongarch_lsx_bnz_v:
4274 case Intrinsic::loongarch_lasx_xbnz_v:
4275 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4277 break;
4278 }
4279}
4280
4283 SelectionDAG &DAG) {
4284 assert(N->getValueType(0) == MVT::i128 &&
4285 "AtomicCmpSwap on types less than 128 should be legal");
4286 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4287
4288 unsigned Opcode;
4289 switch (MemOp->getMergedOrdering()) {
4293 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4294 break;
4297 Opcode = LoongArch::PseudoCmpXchg128;
4298 break;
4299 default:
4300 llvm_unreachable("Unexpected ordering!");
4301 }
4302
4303 SDLoc DL(N);
4304 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4305 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4306 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4307 NewVal.first, NewVal.second, N->getOperand(0)};
4308
4309 SDNode *CmpSwap = DAG.getMachineNode(
4310 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4311 Ops);
4312 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4313 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4314 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4315 Results.push_back(SDValue(CmpSwap, 3));
4316}
4317
4320 SDLoc DL(N);
4321 EVT VT = N->getValueType(0);
4322 switch (N->getOpcode()) {
4323 default:
4324 llvm_unreachable("Don't know how to legalize this operation");
4325 case ISD::ADD:
4326 case ISD::SUB:
4327 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4328 "Unexpected custom legalisation");
4329 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4330 break;
4331 case ISD::SDIV:
4332 case ISD::UDIV:
4333 case ISD::SREM:
4334 case ISD::UREM:
4335 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4336 "Unexpected custom legalisation");
4337 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4338 Subtarget.hasDiv32() && VT == MVT::i32
4340 : ISD::SIGN_EXTEND));
4341 break;
4342 case ISD::SHL:
4343 case ISD::SRA:
4344 case ISD::SRL:
4345 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4346 "Unexpected custom legalisation");
4347 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4348 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4349 break;
4350 }
4351 break;
4352 case ISD::ROTL:
4353 case ISD::ROTR:
4354 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4355 "Unexpected custom legalisation");
4356 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4357 break;
4358 case ISD::FP_TO_SINT: {
4359 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4360 "Unexpected custom legalisation");
4361 SDValue Src = N->getOperand(0);
4362 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4363 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4365 if (!isTypeLegal(Src.getValueType()))
4366 return;
4367 if (Src.getValueType() == MVT::f16)
4368 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4369 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4370 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4371 return;
4372 }
4373 // If the FP type needs to be softened, emit a library call using the 'si'
4374 // version. If we left it to default legalization we'd end up with 'di'.
4375 RTLIB::Libcall LC;
4376 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4377 MakeLibCallOptions CallOptions;
4378 EVT OpVT = Src.getValueType();
4379 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4380 SDValue Chain = SDValue();
4381 SDValue Result;
4382 std::tie(Result, Chain) =
4383 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4384 Results.push_back(Result);
4385 break;
4386 }
4387 case ISD::BITCAST: {
4388 SDValue Src = N->getOperand(0);
4389 EVT SrcVT = Src.getValueType();
4390 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4391 Subtarget.hasBasicF()) {
4392 SDValue Dst =
4393 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4394 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4395 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4397 DAG.getVTList(MVT::i32, MVT::i32), Src);
4398 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4399 NewReg.getValue(0), NewReg.getValue(1));
4400 Results.push_back(RetReg);
4401 }
4402 break;
4403 }
4404 case ISD::FP_TO_UINT: {
4405 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4406 "Unexpected custom legalisation");
4407 auto &TLI = DAG.getTargetLoweringInfo();
4408 SDValue Tmp1, Tmp2;
4409 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4410 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4411 break;
4412 }
4413 case ISD::BSWAP: {
4414 SDValue Src = N->getOperand(0);
4415 assert((VT == MVT::i16 || VT == MVT::i32) &&
4416 "Unexpected custom legalization");
4417 MVT GRLenVT = Subtarget.getGRLenVT();
4418 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4419 SDValue Tmp;
4420 switch (VT.getSizeInBits()) {
4421 default:
4422 llvm_unreachable("Unexpected operand width");
4423 case 16:
4424 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4425 break;
4426 case 32:
4427 // Only LA64 will get to here due to the size mismatch between VT and
4428 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4429 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4430 break;
4431 }
4432 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4433 break;
4434 }
4435 case ISD::BITREVERSE: {
4436 SDValue Src = N->getOperand(0);
4437 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4438 "Unexpected custom legalization");
4439 MVT GRLenVT = Subtarget.getGRLenVT();
4440 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4441 SDValue Tmp;
4442 switch (VT.getSizeInBits()) {
4443 default:
4444 llvm_unreachable("Unexpected operand width");
4445 case 8:
4446 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4447 break;
4448 case 32:
4449 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4450 break;
4451 }
4452 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4453 break;
4454 }
4455 case ISD::CTLZ:
4456 case ISD::CTTZ: {
4457 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4458 "Unexpected custom legalisation");
4459 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4460 break;
4461 }
4463 SDValue Chain = N->getOperand(0);
4464 SDValue Op2 = N->getOperand(2);
4465 MVT GRLenVT = Subtarget.getGRLenVT();
4466 const StringRef ErrorMsgOOR = "argument out of range";
4467 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4468 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4469
4470 switch (N->getConstantOperandVal(1)) {
4471 default:
4472 llvm_unreachable("Unexpected Intrinsic.");
4473 case Intrinsic::loongarch_movfcsr2gr: {
4474 if (!Subtarget.hasBasicF()) {
4475 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4476 return;
4477 }
4478 unsigned Imm = Op2->getAsZExtVal();
4479 if (!isUInt<2>(Imm)) {
4480 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4481 return;
4482 }
4483 SDValue MOVFCSR2GRResults = DAG.getNode(
4484 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4485 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4486 Results.push_back(
4487 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4488 Results.push_back(MOVFCSR2GRResults.getValue(1));
4489 break;
4490 }
4491#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4492 case Intrinsic::loongarch_##NAME: { \
4493 SDValue NODE = DAG.getNode( \
4494 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4495 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4496 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4497 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4498 Results.push_back(NODE.getValue(1)); \
4499 break; \
4500 }
4501 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4502 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4503 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4504 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4505 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4506 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4507#undef CRC_CASE_EXT_BINARYOP
4508
4509#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4510 case Intrinsic::loongarch_##NAME: { \
4511 SDValue NODE = DAG.getNode( \
4512 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4513 {Chain, Op2, \
4514 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4515 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4516 Results.push_back(NODE.getValue(1)); \
4517 break; \
4518 }
4519 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4520 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4521#undef CRC_CASE_EXT_UNARYOP
4522#define CSR_CASE(ID) \
4523 case Intrinsic::loongarch_##ID: { \
4524 if (!Subtarget.is64Bit()) \
4525 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4526 break; \
4527 }
4528 CSR_CASE(csrrd_d);
4529 CSR_CASE(csrwr_d);
4530 CSR_CASE(csrxchg_d);
4531 CSR_CASE(iocsrrd_d);
4532#undef CSR_CASE
4533 case Intrinsic::loongarch_csrrd_w: {
4534 unsigned Imm = Op2->getAsZExtVal();
4535 if (!isUInt<14>(Imm)) {
4536 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4537 return;
4538 }
4539 SDValue CSRRDResults =
4540 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4541 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4542 Results.push_back(
4543 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4544 Results.push_back(CSRRDResults.getValue(1));
4545 break;
4546 }
4547 case Intrinsic::loongarch_csrwr_w: {
4548 unsigned Imm = N->getConstantOperandVal(3);
4549 if (!isUInt<14>(Imm)) {
4550 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4551 return;
4552 }
4553 SDValue CSRWRResults =
4554 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4555 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4556 DAG.getConstant(Imm, DL, GRLenVT)});
4557 Results.push_back(
4558 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4559 Results.push_back(CSRWRResults.getValue(1));
4560 break;
4561 }
4562 case Intrinsic::loongarch_csrxchg_w: {
4563 unsigned Imm = N->getConstantOperandVal(4);
4564 if (!isUInt<14>(Imm)) {
4565 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4566 return;
4567 }
4568 SDValue CSRXCHGResults = DAG.getNode(
4569 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4570 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4571 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4572 DAG.getConstant(Imm, DL, GRLenVT)});
4573 Results.push_back(
4574 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4575 Results.push_back(CSRXCHGResults.getValue(1));
4576 break;
4577 }
4578#define IOCSRRD_CASE(NAME, NODE) \
4579 case Intrinsic::loongarch_##NAME: { \
4580 SDValue IOCSRRDResults = \
4581 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4582 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4583 Results.push_back( \
4584 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4585 Results.push_back(IOCSRRDResults.getValue(1)); \
4586 break; \
4587 }
4588 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4589 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4590 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4591#undef IOCSRRD_CASE
4592 case Intrinsic::loongarch_cpucfg: {
4593 SDValue CPUCFGResults =
4594 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4595 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4596 Results.push_back(
4597 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4598 Results.push_back(CPUCFGResults.getValue(1));
4599 break;
4600 }
4601 case Intrinsic::loongarch_lddir_d: {
4602 if (!Subtarget.is64Bit()) {
4603 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4604 return;
4605 }
4606 break;
4607 }
4608 }
4609 break;
4610 }
4611 case ISD::READ_REGISTER: {
4612 if (Subtarget.is64Bit())
4613 DAG.getContext()->emitError(
4614 "On LA64, only 64-bit registers can be read.");
4615 else
4616 DAG.getContext()->emitError(
4617 "On LA32, only 32-bit registers can be read.");
4618 Results.push_back(DAG.getUNDEF(VT));
4619 Results.push_back(N->getOperand(0));
4620 break;
4621 }
4623 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4624 break;
4625 }
4626 case ISD::LROUND: {
4627 SDValue Op0 = N->getOperand(0);
4628 EVT OpVT = Op0.getValueType();
4629 RTLIB::Libcall LC =
4630 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4631 MakeLibCallOptions CallOptions;
4632 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4633 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4634 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4635 Results.push_back(Result);
4636 break;
4637 }
4638 case ISD::ATOMIC_CMP_SWAP: {
4640 break;
4641 }
4642 case ISD::TRUNCATE: {
4643 MVT VT = N->getSimpleValueType(0);
4644 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4645 return;
4646
4647 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4648 SDValue In = N->getOperand(0);
4649 EVT InVT = In.getValueType();
4650 EVT InEltVT = InVT.getVectorElementType();
4651 EVT EltVT = VT.getVectorElementType();
4652 unsigned MinElts = VT.getVectorNumElements();
4653 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4654 unsigned InBits = InVT.getSizeInBits();
4655
4656 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4657 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4658 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4659 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4660 for (unsigned I = 0; I < MinElts; ++I)
4661 TruncMask[I] = Scale * I;
4662
4663 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4664 MVT SVT = In.getSimpleValueType().getScalarType();
4665 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4666 SDValue WidenIn =
4667 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4668 DAG.getVectorIdxConstant(0, DL));
4669 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4670 "Illegal vector type in truncation");
4671 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4672 Results.push_back(
4673 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4674 return;
4675 }
4676 }
4677
4678 break;
4679 }
4680 }
4681}
4682
4685 const LoongArchSubtarget &Subtarget) {
4686 if (DCI.isBeforeLegalizeOps())
4687 return SDValue();
4688
4689 SDValue FirstOperand = N->getOperand(0);
4690 SDValue SecondOperand = N->getOperand(1);
4691 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4692 EVT ValTy = N->getValueType(0);
4693 SDLoc DL(N);
4694 uint64_t lsb, msb;
4695 unsigned SMIdx, SMLen;
4696 ConstantSDNode *CN;
4697 SDValue NewOperand;
4698 MVT GRLenVT = Subtarget.getGRLenVT();
4699
4700 // BSTRPICK requires the 32S feature.
4701 if (!Subtarget.has32S())
4702 return SDValue();
4703
4704 // Op's second operand must be a shifted mask.
4705 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4706 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4707 return SDValue();
4708
4709 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4710 // Pattern match BSTRPICK.
4711 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4712 // => BSTRPICK $dst, $src, msb, lsb
4713 // where msb = lsb + len - 1
4714
4715 // The second operand of the shift must be an immediate.
4716 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4717 return SDValue();
4718
4719 lsb = CN->getZExtValue();
4720
4721 // Return if the shifted mask does not start at bit 0 or the sum of its
4722 // length and lsb exceeds the word's size.
4723 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4724 return SDValue();
4725
4726 NewOperand = FirstOperand.getOperand(0);
4727 } else {
4728 // Pattern match BSTRPICK.
4729 // $dst = and $src, (2**len- 1) , if len > 12
4730 // => BSTRPICK $dst, $src, msb, lsb
4731 // where lsb = 0 and msb = len - 1
4732
4733 // If the mask is <= 0xfff, andi can be used instead.
4734 if (CN->getZExtValue() <= 0xfff)
4735 return SDValue();
4736
4737 // Return if the MSB exceeds.
4738 if (SMIdx + SMLen > ValTy.getSizeInBits())
4739 return SDValue();
4740
4741 if (SMIdx > 0) {
4742 // Omit if the constant has more than 2 uses. This a conservative
4743 // decision. Whether it is a win depends on the HW microarchitecture.
4744 // However it should always be better for 1 and 2 uses.
4745 if (CN->use_size() > 2)
4746 return SDValue();
4747 // Return if the constant can be composed by a single LU12I.W.
4748 if ((CN->getZExtValue() & 0xfff) == 0)
4749 return SDValue();
4750 // Return if the constand can be composed by a single ADDI with
4751 // the zero register.
4752 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4753 return SDValue();
4754 }
4755
4756 lsb = SMIdx;
4757 NewOperand = FirstOperand;
4758 }
4759
4760 msb = lsb + SMLen - 1;
4761 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4762 DAG.getConstant(msb, DL, GRLenVT),
4763 DAG.getConstant(lsb, DL, GRLenVT));
4764 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4765 return NR0;
4766 // Try to optimize to
4767 // bstrpick $Rd, $Rs, msb, lsb
4768 // slli $Rd, $Rd, lsb
4769 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4770 DAG.getConstant(lsb, DL, GRLenVT));
4771}
4772
4775 const LoongArchSubtarget &Subtarget) {
4776 // BSTRPICK requires the 32S feature.
4777 if (!Subtarget.has32S())
4778 return SDValue();
4779
4780 if (DCI.isBeforeLegalizeOps())
4781 return SDValue();
4782
4783 // $dst = srl (and $src, Mask), Shamt
4784 // =>
4785 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4786 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4787 //
4788
4789 SDValue FirstOperand = N->getOperand(0);
4790 ConstantSDNode *CN;
4791 EVT ValTy = N->getValueType(0);
4792 SDLoc DL(N);
4793 MVT GRLenVT = Subtarget.getGRLenVT();
4794 unsigned MaskIdx, MaskLen;
4795 uint64_t Shamt;
4796
4797 // The first operand must be an AND and the second operand of the AND must be
4798 // a shifted mask.
4799 if (FirstOperand.getOpcode() != ISD::AND ||
4800 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4801 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4802 return SDValue();
4803
4804 // The second operand (shift amount) must be an immediate.
4805 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4806 return SDValue();
4807
4808 Shamt = CN->getZExtValue();
4809 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4810 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4811 FirstOperand->getOperand(0),
4812 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4813 DAG.getConstant(Shamt, DL, GRLenVT));
4814
4815 return SDValue();
4816}
4817
4818// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4819// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4820static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4821 unsigned Depth) {
4822 // Limit recursion.
4824 return false;
4825 switch (Src.getOpcode()) {
4826 case ISD::SETCC:
4827 case ISD::TRUNCATE:
4828 return Src.getOperand(0).getValueSizeInBits() == Size;
4829 case ISD::FREEZE:
4830 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4831 case ISD::AND:
4832 case ISD::XOR:
4833 case ISD::OR:
4834 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4835 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4836 case ISD::SELECT:
4837 case ISD::VSELECT:
4838 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4839 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4840 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4841 case ISD::BUILD_VECTOR:
4842 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4843 ISD::isBuildVectorAllOnes(Src.getNode());
4844 }
4845 return false;
4846}
4847
4848// Helper to push sign extension of vXi1 SETCC result through bitops.
4850 SDValue Src, const SDLoc &DL) {
4851 switch (Src.getOpcode()) {
4852 case ISD::SETCC:
4853 case ISD::FREEZE:
4854 case ISD::TRUNCATE:
4855 case ISD::BUILD_VECTOR:
4856 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4857 case ISD::AND:
4858 case ISD::XOR:
4859 case ISD::OR:
4860 return DAG.getNode(
4861 Src.getOpcode(), DL, SExtVT,
4862 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4863 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4864 case ISD::SELECT:
4865 case ISD::VSELECT:
4866 return DAG.getSelect(
4867 DL, SExtVT, Src.getOperand(0),
4868 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4869 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4870 }
4871 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4872}
4873
4874static SDValue
4877 const LoongArchSubtarget &Subtarget) {
4878 SDLoc DL(N);
4879 EVT VT = N->getValueType(0);
4880 SDValue Src = N->getOperand(0);
4881 EVT SrcVT = Src.getValueType();
4882
4883 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4884 return SDValue();
4885
4886 bool UseLASX;
4887 unsigned Opc = ISD::DELETED_NODE;
4888 EVT CmpVT = Src.getOperand(0).getValueType();
4889 EVT EltVT = CmpVT.getVectorElementType();
4890
4891 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4892 UseLASX = false;
4893 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4894 CmpVT.getSizeInBits() == 256)
4895 UseLASX = true;
4896 else
4897 return SDValue();
4898
4899 SDValue SrcN1 = Src.getOperand(1);
4900 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4901 default:
4902 break;
4903 case ISD::SETEQ:
4904 // x == 0 => not (vmsknez.b x)
4905 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4907 break;
4908 case ISD::SETGT:
4909 // x > -1 => vmskgez.b x
4910 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4912 break;
4913 case ISD::SETGE:
4914 // x >= 0 => vmskgez.b x
4915 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4917 break;
4918 case ISD::SETLT:
4919 // x < 0 => vmskltz.{b,h,w,d} x
4920 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4921 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4922 EltVT == MVT::i64))
4924 break;
4925 case ISD::SETLE:
4926 // x <= -1 => vmskltz.{b,h,w,d} x
4927 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4928 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4929 EltVT == MVT::i64))
4931 break;
4932 case ISD::SETNE:
4933 // x != 0 => vmsknez.b x
4934 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4936 break;
4937 }
4938
4939 if (Opc == ISD::DELETED_NODE)
4940 return SDValue();
4941
4942 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4944 V = DAG.getZExtOrTrunc(V, DL, T);
4945 return DAG.getBitcast(VT, V);
4946}
4947
4950 const LoongArchSubtarget &Subtarget) {
4951 SDLoc DL(N);
4952 EVT VT = N->getValueType(0);
4953 SDValue Src = N->getOperand(0);
4954 EVT SrcVT = Src.getValueType();
4955
4956 if (!DCI.isBeforeLegalizeOps())
4957 return SDValue();
4958
4959 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4960 return SDValue();
4961
4962 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4963 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4964 if (Res)
4965 return Res;
4966
4967 // Generate vXi1 using [X]VMSKLTZ
4968 MVT SExtVT;
4969 unsigned Opc;
4970 bool UseLASX = false;
4971 bool PropagateSExt = false;
4972
4973 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4974 EVT CmpVT = Src.getOperand(0).getValueType();
4975 if (CmpVT.getSizeInBits() > 256)
4976 return SDValue();
4977 }
4978
4979 switch (SrcVT.getSimpleVT().SimpleTy) {
4980 default:
4981 return SDValue();
4982 case MVT::v2i1:
4983 SExtVT = MVT::v2i64;
4984 break;
4985 case MVT::v4i1:
4986 SExtVT = MVT::v4i32;
4987 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4988 SExtVT = MVT::v4i64;
4989 UseLASX = true;
4990 PropagateSExt = true;
4991 }
4992 break;
4993 case MVT::v8i1:
4994 SExtVT = MVT::v8i16;
4995 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4996 SExtVT = MVT::v8i32;
4997 UseLASX = true;
4998 PropagateSExt = true;
4999 }
5000 break;
5001 case MVT::v16i1:
5002 SExtVT = MVT::v16i8;
5003 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5004 SExtVT = MVT::v16i16;
5005 UseLASX = true;
5006 PropagateSExt = true;
5007 }
5008 break;
5009 case MVT::v32i1:
5010 SExtVT = MVT::v32i8;
5011 UseLASX = true;
5012 break;
5013 };
5014 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5015 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5016
5017 SDValue V;
5018 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5019 if (Src.getSimpleValueType() == MVT::v32i8) {
5020 SDValue Lo, Hi;
5021 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5022 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5023 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5024 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5025 DAG.getConstant(16, DL, MVT::i8));
5026 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5027 } else if (UseLASX) {
5028 return SDValue();
5029 }
5030 }
5031
5032 if (!V) {
5034 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5035 }
5036
5038 V = DAG.getZExtOrTrunc(V, DL, T);
5039 return DAG.getBitcast(VT, V);
5040}
5041
5044 const LoongArchSubtarget &Subtarget) {
5045 MVT GRLenVT = Subtarget.getGRLenVT();
5046 EVT ValTy = N->getValueType(0);
5047 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5048 ConstantSDNode *CN0, *CN1;
5049 SDLoc DL(N);
5050 unsigned ValBits = ValTy.getSizeInBits();
5051 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5052 unsigned Shamt;
5053 bool SwapAndRetried = false;
5054
5055 // BSTRPICK requires the 32S feature.
5056 if (!Subtarget.has32S())
5057 return SDValue();
5058
5059 if (DCI.isBeforeLegalizeOps())
5060 return SDValue();
5061
5062 if (ValBits != 32 && ValBits != 64)
5063 return SDValue();
5064
5065Retry:
5066 // 1st pattern to match BSTRINS:
5067 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5068 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5069 // =>
5070 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5071 if (N0.getOpcode() == ISD::AND &&
5072 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5073 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5074 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5075 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5076 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5077 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5078 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5079 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5080 (MaskIdx0 + MaskLen0 <= ValBits)) {
5081 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5082 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5083 N1.getOperand(0).getOperand(0),
5084 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5085 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5086 }
5087
5088 // 2nd pattern to match BSTRINS:
5089 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5090 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5091 // =>
5092 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5093 if (N0.getOpcode() == ISD::AND &&
5094 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5095 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5096 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5097 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5098 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5099 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5100 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5101 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5102 (MaskIdx0 + MaskLen0 <= ValBits)) {
5103 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5104 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5105 N1.getOperand(0).getOperand(0),
5106 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5107 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5108 }
5109
5110 // 3rd pattern to match BSTRINS:
5111 // R = or (and X, mask0), (and Y, mask1)
5112 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5113 // =>
5114 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5115 // where msb = lsb + size - 1
5116 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5117 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5118 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5119 (MaskIdx0 + MaskLen0 <= 64) &&
5120 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5121 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5122 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5123 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5124 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5125 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5126 DAG.getConstant(ValBits == 32
5127 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5128 : (MaskIdx0 + MaskLen0 - 1),
5129 DL, GRLenVT),
5130 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5131 }
5132
5133 // 4th pattern to match BSTRINS:
5134 // R = or (and X, mask), (shl Y, shamt)
5135 // where mask = (2**shamt - 1)
5136 // =>
5137 // R = BSTRINS X, Y, ValBits - 1, shamt
5138 // where ValBits = 32 or 64
5139 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5140 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5141 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5142 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5143 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5144 (MaskIdx0 + MaskLen0 <= ValBits)) {
5145 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5146 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5147 N1.getOperand(0),
5148 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5149 DAG.getConstant(Shamt, DL, GRLenVT));
5150 }
5151
5152 // 5th pattern to match BSTRINS:
5153 // R = or (and X, mask), const
5154 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5155 // =>
5156 // R = BSTRINS X, (const >> lsb), msb, lsb
5157 // where msb = lsb + size - 1
5158 if (N0.getOpcode() == ISD::AND &&
5159 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5160 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5161 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5162 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5163 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5164 return DAG.getNode(
5165 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5166 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5167 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5168 : (MaskIdx0 + MaskLen0 - 1),
5169 DL, GRLenVT),
5170 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5171 }
5172
5173 // 6th pattern.
5174 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5175 // by the incoming bits are known to be zero.
5176 // =>
5177 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5178 //
5179 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5180 // pattern is more common than the 1st. So we put the 1st before the 6th in
5181 // order to match as many nodes as possible.
5182 ConstantSDNode *CNMask, *CNShamt;
5183 unsigned MaskIdx, MaskLen;
5184 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5185 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5186 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5187 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5188 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5189 Shamt = CNShamt->getZExtValue();
5190 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5191 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5192 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5193 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5194 N1.getOperand(0).getOperand(0),
5195 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5196 DAG.getConstant(Shamt, DL, GRLenVT));
5197 }
5198 }
5199
5200 // 7th pattern.
5201 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5202 // overwritten by the incoming bits are known to be zero.
5203 // =>
5204 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5205 //
5206 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5207 // before the 7th in order to match as many nodes as possible.
5208 if (N1.getOpcode() == ISD::AND &&
5209 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5210 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5211 N1.getOperand(0).getOpcode() == ISD::SHL &&
5212 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5213 CNShamt->getZExtValue() == MaskIdx) {
5214 APInt ShMask(ValBits, CNMask->getZExtValue());
5215 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5216 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5217 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5218 N1.getOperand(0).getOperand(0),
5219 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5220 DAG.getConstant(MaskIdx, DL, GRLenVT));
5221 }
5222 }
5223
5224 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5225 if (!SwapAndRetried) {
5226 std::swap(N0, N1);
5227 SwapAndRetried = true;
5228 goto Retry;
5229 }
5230
5231 SwapAndRetried = false;
5232Retry2:
5233 // 8th pattern.
5234 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5235 // the incoming bits are known to be zero.
5236 // =>
5237 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5238 //
5239 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5240 // we put it here in order to match as many nodes as possible or generate less
5241 // instructions.
5242 if (N1.getOpcode() == ISD::AND &&
5243 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5244 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5245 APInt ShMask(ValBits, CNMask->getZExtValue());
5246 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5247 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5248 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5249 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5250 N1->getOperand(0),
5251 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5252 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5253 DAG.getConstant(MaskIdx, DL, GRLenVT));
5254 }
5255 }
5256 // Swap N0/N1 and retry.
5257 if (!SwapAndRetried) {
5258 std::swap(N0, N1);
5259 SwapAndRetried = true;
5260 goto Retry2;
5261 }
5262
5263 return SDValue();
5264}
5265
5266static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5267 ExtType = ISD::NON_EXTLOAD;
5268
5269 switch (V.getNode()->getOpcode()) {
5270 case ISD::LOAD: {
5271 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5272 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5273 (LoadNode->getMemoryVT() == MVT::i16)) {
5274 ExtType = LoadNode->getExtensionType();
5275 return true;
5276 }
5277 return false;
5278 }
5279 case ISD::AssertSext: {
5280 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5281 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5282 ExtType = ISD::SEXTLOAD;
5283 return true;
5284 }
5285 return false;
5286 }
5287 case ISD::AssertZext: {
5288 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5289 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5290 ExtType = ISD::ZEXTLOAD;
5291 return true;
5292 }
5293 return false;
5294 }
5295 default:
5296 return false;
5297 }
5298
5299 return false;
5300}
5301
5302// Eliminate redundant truncation and zero-extension nodes.
5303// * Case 1:
5304// +------------+ +------------+ +------------+
5305// | Input1 | | Input2 | | CC |
5306// +------------+ +------------+ +------------+
5307// | | |
5308// V V +----+
5309// +------------+ +------------+ |
5310// | TRUNCATE | | TRUNCATE | |
5311// +------------+ +------------+ |
5312// | | |
5313// V V |
5314// +------------+ +------------+ |
5315// | ZERO_EXT | | ZERO_EXT | |
5316// +------------+ +------------+ |
5317// | | |
5318// | +-------------+ |
5319// V V | |
5320// +----------------+ | |
5321// | AND | | |
5322// +----------------+ | |
5323// | | |
5324// +---------------+ | |
5325// | | |
5326// V V V
5327// +-------------+
5328// | CMP |
5329// +-------------+
5330// * Case 2:
5331// +------------+ +------------+ +-------------+ +------------+ +------------+
5332// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5333// +------------+ +------------+ +-------------+ +------------+ +------------+
5334// | | | | |
5335// V | | | |
5336// +------------+ | | | |
5337// | XOR |<---------------------+ | |
5338// +------------+ | | |
5339// | | | |
5340// V V +---------------+ |
5341// +------------+ +------------+ | |
5342// | TRUNCATE | | TRUNCATE | | +-------------------------+
5343// +------------+ +------------+ | |
5344// | | | |
5345// V V | |
5346// +------------+ +------------+ | |
5347// | ZERO_EXT | | ZERO_EXT | | |
5348// +------------+ +------------+ | |
5349// | | | |
5350// V V | |
5351// +----------------+ | |
5352// | AND | | |
5353// +----------------+ | |
5354// | | |
5355// +---------------+ | |
5356// | | |
5357// V V V
5358// +-------------+
5359// | CMP |
5360// +-------------+
5363 const LoongArchSubtarget &Subtarget) {
5364 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5365
5366 SDNode *AndNode = N->getOperand(0).getNode();
5367 if (AndNode->getOpcode() != ISD::AND)
5368 return SDValue();
5369
5370 SDValue AndInputValue2 = AndNode->getOperand(1);
5371 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5372 return SDValue();
5373
5374 SDValue CmpInputValue = N->getOperand(1);
5375 SDValue AndInputValue1 = AndNode->getOperand(0);
5376 if (AndInputValue1.getOpcode() == ISD::XOR) {
5377 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5378 return SDValue();
5379 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5380 if (!CN || CN->getSExtValue() != -1)
5381 return SDValue();
5382 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5383 if (!CN || CN->getSExtValue() != 0)
5384 return SDValue();
5385 AndInputValue1 = AndInputValue1.getOperand(0);
5386 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5387 return SDValue();
5388 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5389 if (AndInputValue2 != CmpInputValue)
5390 return SDValue();
5391 } else {
5392 return SDValue();
5393 }
5394
5395 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5396 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5397 return SDValue();
5398
5399 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5400 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5401 return SDValue();
5402
5403 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5404 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5405 ISD::LoadExtType ExtType1;
5406 ISD::LoadExtType ExtType2;
5407
5408 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5409 !checkValueWidth(TruncInputValue2, ExtType2))
5410 return SDValue();
5411
5412 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5413 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5414 return SDValue();
5415
5416 if ((ExtType2 != ISD::ZEXTLOAD) &&
5417 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5418 return SDValue();
5419
5420 // These truncation and zero-extension nodes are not necessary, remove them.
5421 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5422 TruncInputValue1, TruncInputValue2);
5423 SDValue NewSetCC =
5424 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5425 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5426 return SDValue(N, 0);
5427}
5428
5429// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5432 const LoongArchSubtarget &Subtarget) {
5433 if (DCI.isBeforeLegalizeOps())
5434 return SDValue();
5435
5436 SDValue Src = N->getOperand(0);
5437 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5438 return SDValue();
5439
5440 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5441 Src.getOperand(0));
5442}
5443
5444// Perform common combines for BR_CC and SELECT_CC conditions.
5445static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5446 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5447 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5448
5449 // As far as arithmetic right shift always saves the sign,
5450 // shift can be omitted.
5451 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5452 // setge (sra X, N), 0 -> setge X, 0
5453 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5454 LHS.getOpcode() == ISD::SRA) {
5455 LHS = LHS.getOperand(0);
5456 return true;
5457 }
5458
5459 if (!ISD::isIntEqualitySetCC(CCVal))
5460 return false;
5461
5462 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5463 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5464 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5465 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5466 // If we're looking for eq 0 instead of ne 0, we need to invert the
5467 // condition.
5468 bool Invert = CCVal == ISD::SETEQ;
5469 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5470 if (Invert)
5471 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5472
5473 RHS = LHS.getOperand(1);
5474 LHS = LHS.getOperand(0);
5475 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5476
5477 CC = DAG.getCondCode(CCVal);
5478 return true;
5479 }
5480
5481 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5482 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5483 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5484 SDValue LHS0 = LHS.getOperand(0);
5485 if (LHS0.getOpcode() == ISD::AND &&
5486 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5487 uint64_t Mask = LHS0.getConstantOperandVal(1);
5488 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5489 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5490 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5491 CC = DAG.getCondCode(CCVal);
5492
5493 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5494 LHS = LHS0.getOperand(0);
5495 if (ShAmt != 0)
5496 LHS =
5497 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5498 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5499 return true;
5500 }
5501 }
5502 }
5503
5504 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5505 // This can occur when legalizing some floating point comparisons.
5506 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5507 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5508 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5509 CC = DAG.getCondCode(CCVal);
5510 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5511 return true;
5512 }
5513
5514 return false;
5515}
5516
5519 const LoongArchSubtarget &Subtarget) {
5520 SDValue LHS = N->getOperand(1);
5521 SDValue RHS = N->getOperand(2);
5522 SDValue CC = N->getOperand(3);
5523 SDLoc DL(N);
5524
5525 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5526 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5527 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5528
5529 return SDValue();
5530}
5531
5534 const LoongArchSubtarget &Subtarget) {
5535 // Transform
5536 SDValue LHS = N->getOperand(0);
5537 SDValue RHS = N->getOperand(1);
5538 SDValue CC = N->getOperand(2);
5539 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5540 SDValue TrueV = N->getOperand(3);
5541 SDValue FalseV = N->getOperand(4);
5542 SDLoc DL(N);
5543 EVT VT = N->getValueType(0);
5544
5545 // If the True and False values are the same, we don't need a select_cc.
5546 if (TrueV == FalseV)
5547 return TrueV;
5548
5549 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5550 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5551 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5553 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5554 if (CCVal == ISD::CondCode::SETGE)
5555 std::swap(TrueV, FalseV);
5556
5557 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5558 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5559 // Only handle simm12, if it is not in this range, it can be considered as
5560 // register.
5561 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5562 isInt<12>(TrueSImm - FalseSImm)) {
5563 SDValue SRA =
5564 DAG.getNode(ISD::SRA, DL, VT, LHS,
5565 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5566 SDValue AND =
5567 DAG.getNode(ISD::AND, DL, VT, SRA,
5568 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5569 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5570 }
5571
5572 if (CCVal == ISD::CondCode::SETGE)
5573 std::swap(TrueV, FalseV);
5574 }
5575
5576 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5577 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5578 {LHS, RHS, CC, TrueV, FalseV});
5579
5580 return SDValue();
5581}
5582
5583template <unsigned N>
5585 SelectionDAG &DAG,
5586 const LoongArchSubtarget &Subtarget,
5587 bool IsSigned = false) {
5588 SDLoc DL(Node);
5589 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5590 // Check the ImmArg.
5591 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5592 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5593 DAG.getContext()->emitError(Node->getOperationName(0) +
5594 ": argument out of range.");
5595 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5596 }
5597 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5598}
5599
5600template <unsigned N>
5601static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5602 SelectionDAG &DAG, bool IsSigned = false) {
5603 SDLoc DL(Node);
5604 EVT ResTy = Node->getValueType(0);
5605 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5606
5607 // Check the ImmArg.
5608 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5609 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5610 DAG.getContext()->emitError(Node->getOperationName(0) +
5611 ": argument out of range.");
5612 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5613 }
5614 return DAG.getConstant(
5616 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5617 DL, ResTy);
5618}
5619
5621 SDLoc DL(Node);
5622 EVT ResTy = Node->getValueType(0);
5623 SDValue Vec = Node->getOperand(2);
5624 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5625 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5626}
5627
5629 SDLoc DL(Node);
5630 EVT ResTy = Node->getValueType(0);
5631 SDValue One = DAG.getConstant(1, DL, ResTy);
5632 SDValue Bit =
5633 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5634
5635 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5636 DAG.getNOT(DL, Bit, ResTy));
5637}
5638
5639template <unsigned N>
5641 SDLoc DL(Node);
5642 EVT ResTy = Node->getValueType(0);
5643 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5644 // Check the unsigned ImmArg.
5645 if (!isUInt<N>(CImm->getZExtValue())) {
5646 DAG.getContext()->emitError(Node->getOperationName(0) +
5647 ": argument out of range.");
5648 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5649 }
5650
5651 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5652 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5653
5654 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5655}
5656
5657template <unsigned N>
5659 SDLoc DL(Node);
5660 EVT ResTy = Node->getValueType(0);
5661 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5662 // Check the unsigned ImmArg.
5663 if (!isUInt<N>(CImm->getZExtValue())) {
5664 DAG.getContext()->emitError(Node->getOperationName(0) +
5665 ": argument out of range.");
5666 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5667 }
5668
5669 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5670 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5671 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5672}
5673
5674template <unsigned N>
5676 SDLoc DL(Node);
5677 EVT ResTy = Node->getValueType(0);
5678 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5679 // Check the unsigned ImmArg.
5680 if (!isUInt<N>(CImm->getZExtValue())) {
5681 DAG.getContext()->emitError(Node->getOperationName(0) +
5682 ": argument out of range.");
5683 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5684 }
5685
5686 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5687 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5688 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5689}
5690
5691static SDValue
5694 const LoongArchSubtarget &Subtarget) {
5695 SDLoc DL(N);
5696 switch (N->getConstantOperandVal(0)) {
5697 default:
5698 break;
5699 case Intrinsic::loongarch_lsx_vadd_b:
5700 case Intrinsic::loongarch_lsx_vadd_h:
5701 case Intrinsic::loongarch_lsx_vadd_w:
5702 case Intrinsic::loongarch_lsx_vadd_d:
5703 case Intrinsic::loongarch_lasx_xvadd_b:
5704 case Intrinsic::loongarch_lasx_xvadd_h:
5705 case Intrinsic::loongarch_lasx_xvadd_w:
5706 case Intrinsic::loongarch_lasx_xvadd_d:
5707 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5708 N->getOperand(2));
5709 case Intrinsic::loongarch_lsx_vaddi_bu:
5710 case Intrinsic::loongarch_lsx_vaddi_hu:
5711 case Intrinsic::loongarch_lsx_vaddi_wu:
5712 case Intrinsic::loongarch_lsx_vaddi_du:
5713 case Intrinsic::loongarch_lasx_xvaddi_bu:
5714 case Intrinsic::loongarch_lasx_xvaddi_hu:
5715 case Intrinsic::loongarch_lasx_xvaddi_wu:
5716 case Intrinsic::loongarch_lasx_xvaddi_du:
5717 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5718 lowerVectorSplatImm<5>(N, 2, DAG));
5719 case Intrinsic::loongarch_lsx_vsub_b:
5720 case Intrinsic::loongarch_lsx_vsub_h:
5721 case Intrinsic::loongarch_lsx_vsub_w:
5722 case Intrinsic::loongarch_lsx_vsub_d:
5723 case Intrinsic::loongarch_lasx_xvsub_b:
5724 case Intrinsic::loongarch_lasx_xvsub_h:
5725 case Intrinsic::loongarch_lasx_xvsub_w:
5726 case Intrinsic::loongarch_lasx_xvsub_d:
5727 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5728 N->getOperand(2));
5729 case Intrinsic::loongarch_lsx_vsubi_bu:
5730 case Intrinsic::loongarch_lsx_vsubi_hu:
5731 case Intrinsic::loongarch_lsx_vsubi_wu:
5732 case Intrinsic::loongarch_lsx_vsubi_du:
5733 case Intrinsic::loongarch_lasx_xvsubi_bu:
5734 case Intrinsic::loongarch_lasx_xvsubi_hu:
5735 case Intrinsic::loongarch_lasx_xvsubi_wu:
5736 case Intrinsic::loongarch_lasx_xvsubi_du:
5737 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5738 lowerVectorSplatImm<5>(N, 2, DAG));
5739 case Intrinsic::loongarch_lsx_vneg_b:
5740 case Intrinsic::loongarch_lsx_vneg_h:
5741 case Intrinsic::loongarch_lsx_vneg_w:
5742 case Intrinsic::loongarch_lsx_vneg_d:
5743 case Intrinsic::loongarch_lasx_xvneg_b:
5744 case Intrinsic::loongarch_lasx_xvneg_h:
5745 case Intrinsic::loongarch_lasx_xvneg_w:
5746 case Intrinsic::loongarch_lasx_xvneg_d:
5747 return DAG.getNode(
5748 ISD::SUB, DL, N->getValueType(0),
5749 DAG.getConstant(
5750 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5751 /*isSigned=*/true),
5752 SDLoc(N), N->getValueType(0)),
5753 N->getOperand(1));
5754 case Intrinsic::loongarch_lsx_vmax_b:
5755 case Intrinsic::loongarch_lsx_vmax_h:
5756 case Intrinsic::loongarch_lsx_vmax_w:
5757 case Intrinsic::loongarch_lsx_vmax_d:
5758 case Intrinsic::loongarch_lasx_xvmax_b:
5759 case Intrinsic::loongarch_lasx_xvmax_h:
5760 case Intrinsic::loongarch_lasx_xvmax_w:
5761 case Intrinsic::loongarch_lasx_xvmax_d:
5762 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5763 N->getOperand(2));
5764 case Intrinsic::loongarch_lsx_vmax_bu:
5765 case Intrinsic::loongarch_lsx_vmax_hu:
5766 case Intrinsic::loongarch_lsx_vmax_wu:
5767 case Intrinsic::loongarch_lsx_vmax_du:
5768 case Intrinsic::loongarch_lasx_xvmax_bu:
5769 case Intrinsic::loongarch_lasx_xvmax_hu:
5770 case Intrinsic::loongarch_lasx_xvmax_wu:
5771 case Intrinsic::loongarch_lasx_xvmax_du:
5772 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5773 N->getOperand(2));
5774 case Intrinsic::loongarch_lsx_vmaxi_b:
5775 case Intrinsic::loongarch_lsx_vmaxi_h:
5776 case Intrinsic::loongarch_lsx_vmaxi_w:
5777 case Intrinsic::loongarch_lsx_vmaxi_d:
5778 case Intrinsic::loongarch_lasx_xvmaxi_b:
5779 case Intrinsic::loongarch_lasx_xvmaxi_h:
5780 case Intrinsic::loongarch_lasx_xvmaxi_w:
5781 case Intrinsic::loongarch_lasx_xvmaxi_d:
5782 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5783 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5784 case Intrinsic::loongarch_lsx_vmaxi_bu:
5785 case Intrinsic::loongarch_lsx_vmaxi_hu:
5786 case Intrinsic::loongarch_lsx_vmaxi_wu:
5787 case Intrinsic::loongarch_lsx_vmaxi_du:
5788 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5789 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5790 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5791 case Intrinsic::loongarch_lasx_xvmaxi_du:
5792 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5793 lowerVectorSplatImm<5>(N, 2, DAG));
5794 case Intrinsic::loongarch_lsx_vmin_b:
5795 case Intrinsic::loongarch_lsx_vmin_h:
5796 case Intrinsic::loongarch_lsx_vmin_w:
5797 case Intrinsic::loongarch_lsx_vmin_d:
5798 case Intrinsic::loongarch_lasx_xvmin_b:
5799 case Intrinsic::loongarch_lasx_xvmin_h:
5800 case Intrinsic::loongarch_lasx_xvmin_w:
5801 case Intrinsic::loongarch_lasx_xvmin_d:
5802 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5803 N->getOperand(2));
5804 case Intrinsic::loongarch_lsx_vmin_bu:
5805 case Intrinsic::loongarch_lsx_vmin_hu:
5806 case Intrinsic::loongarch_lsx_vmin_wu:
5807 case Intrinsic::loongarch_lsx_vmin_du:
5808 case Intrinsic::loongarch_lasx_xvmin_bu:
5809 case Intrinsic::loongarch_lasx_xvmin_hu:
5810 case Intrinsic::loongarch_lasx_xvmin_wu:
5811 case Intrinsic::loongarch_lasx_xvmin_du:
5812 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5813 N->getOperand(2));
5814 case Intrinsic::loongarch_lsx_vmini_b:
5815 case Intrinsic::loongarch_lsx_vmini_h:
5816 case Intrinsic::loongarch_lsx_vmini_w:
5817 case Intrinsic::loongarch_lsx_vmini_d:
5818 case Intrinsic::loongarch_lasx_xvmini_b:
5819 case Intrinsic::loongarch_lasx_xvmini_h:
5820 case Intrinsic::loongarch_lasx_xvmini_w:
5821 case Intrinsic::loongarch_lasx_xvmini_d:
5822 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5823 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5824 case Intrinsic::loongarch_lsx_vmini_bu:
5825 case Intrinsic::loongarch_lsx_vmini_hu:
5826 case Intrinsic::loongarch_lsx_vmini_wu:
5827 case Intrinsic::loongarch_lsx_vmini_du:
5828 case Intrinsic::loongarch_lasx_xvmini_bu:
5829 case Intrinsic::loongarch_lasx_xvmini_hu:
5830 case Intrinsic::loongarch_lasx_xvmini_wu:
5831 case Intrinsic::loongarch_lasx_xvmini_du:
5832 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5833 lowerVectorSplatImm<5>(N, 2, DAG));
5834 case Intrinsic::loongarch_lsx_vmul_b:
5835 case Intrinsic::loongarch_lsx_vmul_h:
5836 case Intrinsic::loongarch_lsx_vmul_w:
5837 case Intrinsic::loongarch_lsx_vmul_d:
5838 case Intrinsic::loongarch_lasx_xvmul_b:
5839 case Intrinsic::loongarch_lasx_xvmul_h:
5840 case Intrinsic::loongarch_lasx_xvmul_w:
5841 case Intrinsic::loongarch_lasx_xvmul_d:
5842 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5843 N->getOperand(2));
5844 case Intrinsic::loongarch_lsx_vmadd_b:
5845 case Intrinsic::loongarch_lsx_vmadd_h:
5846 case Intrinsic::loongarch_lsx_vmadd_w:
5847 case Intrinsic::loongarch_lsx_vmadd_d:
5848 case Intrinsic::loongarch_lasx_xvmadd_b:
5849 case Intrinsic::loongarch_lasx_xvmadd_h:
5850 case Intrinsic::loongarch_lasx_xvmadd_w:
5851 case Intrinsic::loongarch_lasx_xvmadd_d: {
5852 EVT ResTy = N->getValueType(0);
5853 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5854 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5855 N->getOperand(3)));
5856 }
5857 case Intrinsic::loongarch_lsx_vmsub_b:
5858 case Intrinsic::loongarch_lsx_vmsub_h:
5859 case Intrinsic::loongarch_lsx_vmsub_w:
5860 case Intrinsic::loongarch_lsx_vmsub_d:
5861 case Intrinsic::loongarch_lasx_xvmsub_b:
5862 case Intrinsic::loongarch_lasx_xvmsub_h:
5863 case Intrinsic::loongarch_lasx_xvmsub_w:
5864 case Intrinsic::loongarch_lasx_xvmsub_d: {
5865 EVT ResTy = N->getValueType(0);
5866 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5867 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5868 N->getOperand(3)));
5869 }
5870 case Intrinsic::loongarch_lsx_vdiv_b:
5871 case Intrinsic::loongarch_lsx_vdiv_h:
5872 case Intrinsic::loongarch_lsx_vdiv_w:
5873 case Intrinsic::loongarch_lsx_vdiv_d:
5874 case Intrinsic::loongarch_lasx_xvdiv_b:
5875 case Intrinsic::loongarch_lasx_xvdiv_h:
5876 case Intrinsic::loongarch_lasx_xvdiv_w:
5877 case Intrinsic::loongarch_lasx_xvdiv_d:
5878 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5879 N->getOperand(2));
5880 case Intrinsic::loongarch_lsx_vdiv_bu:
5881 case Intrinsic::loongarch_lsx_vdiv_hu:
5882 case Intrinsic::loongarch_lsx_vdiv_wu:
5883 case Intrinsic::loongarch_lsx_vdiv_du:
5884 case Intrinsic::loongarch_lasx_xvdiv_bu:
5885 case Intrinsic::loongarch_lasx_xvdiv_hu:
5886 case Intrinsic::loongarch_lasx_xvdiv_wu:
5887 case Intrinsic::loongarch_lasx_xvdiv_du:
5888 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5889 N->getOperand(2));
5890 case Intrinsic::loongarch_lsx_vmod_b:
5891 case Intrinsic::loongarch_lsx_vmod_h:
5892 case Intrinsic::loongarch_lsx_vmod_w:
5893 case Intrinsic::loongarch_lsx_vmod_d:
5894 case Intrinsic::loongarch_lasx_xvmod_b:
5895 case Intrinsic::loongarch_lasx_xvmod_h:
5896 case Intrinsic::loongarch_lasx_xvmod_w:
5897 case Intrinsic::loongarch_lasx_xvmod_d:
5898 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5899 N->getOperand(2));
5900 case Intrinsic::loongarch_lsx_vmod_bu:
5901 case Intrinsic::loongarch_lsx_vmod_hu:
5902 case Intrinsic::loongarch_lsx_vmod_wu:
5903 case Intrinsic::loongarch_lsx_vmod_du:
5904 case Intrinsic::loongarch_lasx_xvmod_bu:
5905 case Intrinsic::loongarch_lasx_xvmod_hu:
5906 case Intrinsic::loongarch_lasx_xvmod_wu:
5907 case Intrinsic::loongarch_lasx_xvmod_du:
5908 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5909 N->getOperand(2));
5910 case Intrinsic::loongarch_lsx_vand_v:
5911 case Intrinsic::loongarch_lasx_xvand_v:
5912 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5913 N->getOperand(2));
5914 case Intrinsic::loongarch_lsx_vor_v:
5915 case Intrinsic::loongarch_lasx_xvor_v:
5916 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5917 N->getOperand(2));
5918 case Intrinsic::loongarch_lsx_vxor_v:
5919 case Intrinsic::loongarch_lasx_xvxor_v:
5920 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5921 N->getOperand(2));
5922 case Intrinsic::loongarch_lsx_vnor_v:
5923 case Intrinsic::loongarch_lasx_xvnor_v: {
5924 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5925 N->getOperand(2));
5926 return DAG.getNOT(DL, Res, Res->getValueType(0));
5927 }
5928 case Intrinsic::loongarch_lsx_vandi_b:
5929 case Intrinsic::loongarch_lasx_xvandi_b:
5930 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5931 lowerVectorSplatImm<8>(N, 2, DAG));
5932 case Intrinsic::loongarch_lsx_vori_b:
5933 case Intrinsic::loongarch_lasx_xvori_b:
5934 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5935 lowerVectorSplatImm<8>(N, 2, DAG));
5936 case Intrinsic::loongarch_lsx_vxori_b:
5937 case Intrinsic::loongarch_lasx_xvxori_b:
5938 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5939 lowerVectorSplatImm<8>(N, 2, DAG));
5940 case Intrinsic::loongarch_lsx_vsll_b:
5941 case Intrinsic::loongarch_lsx_vsll_h:
5942 case Intrinsic::loongarch_lsx_vsll_w:
5943 case Intrinsic::loongarch_lsx_vsll_d:
5944 case Intrinsic::loongarch_lasx_xvsll_b:
5945 case Intrinsic::loongarch_lasx_xvsll_h:
5946 case Intrinsic::loongarch_lasx_xvsll_w:
5947 case Intrinsic::loongarch_lasx_xvsll_d:
5948 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5949 truncateVecElts(N, DAG));
5950 case Intrinsic::loongarch_lsx_vslli_b:
5951 case Intrinsic::loongarch_lasx_xvslli_b:
5952 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5953 lowerVectorSplatImm<3>(N, 2, DAG));
5954 case Intrinsic::loongarch_lsx_vslli_h:
5955 case Intrinsic::loongarch_lasx_xvslli_h:
5956 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5957 lowerVectorSplatImm<4>(N, 2, DAG));
5958 case Intrinsic::loongarch_lsx_vslli_w:
5959 case Intrinsic::loongarch_lasx_xvslli_w:
5960 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5961 lowerVectorSplatImm<5>(N, 2, DAG));
5962 case Intrinsic::loongarch_lsx_vslli_d:
5963 case Intrinsic::loongarch_lasx_xvslli_d:
5964 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5965 lowerVectorSplatImm<6>(N, 2, DAG));
5966 case Intrinsic::loongarch_lsx_vsrl_b:
5967 case Intrinsic::loongarch_lsx_vsrl_h:
5968 case Intrinsic::loongarch_lsx_vsrl_w:
5969 case Intrinsic::loongarch_lsx_vsrl_d:
5970 case Intrinsic::loongarch_lasx_xvsrl_b:
5971 case Intrinsic::loongarch_lasx_xvsrl_h:
5972 case Intrinsic::loongarch_lasx_xvsrl_w:
5973 case Intrinsic::loongarch_lasx_xvsrl_d:
5974 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5975 truncateVecElts(N, DAG));
5976 case Intrinsic::loongarch_lsx_vsrli_b:
5977 case Intrinsic::loongarch_lasx_xvsrli_b:
5978 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5979 lowerVectorSplatImm<3>(N, 2, DAG));
5980 case Intrinsic::loongarch_lsx_vsrli_h:
5981 case Intrinsic::loongarch_lasx_xvsrli_h:
5982 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5983 lowerVectorSplatImm<4>(N, 2, DAG));
5984 case Intrinsic::loongarch_lsx_vsrli_w:
5985 case Intrinsic::loongarch_lasx_xvsrli_w:
5986 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5987 lowerVectorSplatImm<5>(N, 2, DAG));
5988 case Intrinsic::loongarch_lsx_vsrli_d:
5989 case Intrinsic::loongarch_lasx_xvsrli_d:
5990 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5991 lowerVectorSplatImm<6>(N, 2, DAG));
5992 case Intrinsic::loongarch_lsx_vsra_b:
5993 case Intrinsic::loongarch_lsx_vsra_h:
5994 case Intrinsic::loongarch_lsx_vsra_w:
5995 case Intrinsic::loongarch_lsx_vsra_d:
5996 case Intrinsic::loongarch_lasx_xvsra_b:
5997 case Intrinsic::loongarch_lasx_xvsra_h:
5998 case Intrinsic::loongarch_lasx_xvsra_w:
5999 case Intrinsic::loongarch_lasx_xvsra_d:
6000 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6001 truncateVecElts(N, DAG));
6002 case Intrinsic::loongarch_lsx_vsrai_b:
6003 case Intrinsic::loongarch_lasx_xvsrai_b:
6004 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6005 lowerVectorSplatImm<3>(N, 2, DAG));
6006 case Intrinsic::loongarch_lsx_vsrai_h:
6007 case Intrinsic::loongarch_lasx_xvsrai_h:
6008 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6009 lowerVectorSplatImm<4>(N, 2, DAG));
6010 case Intrinsic::loongarch_lsx_vsrai_w:
6011 case Intrinsic::loongarch_lasx_xvsrai_w:
6012 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6013 lowerVectorSplatImm<5>(N, 2, DAG));
6014 case Intrinsic::loongarch_lsx_vsrai_d:
6015 case Intrinsic::loongarch_lasx_xvsrai_d:
6016 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6017 lowerVectorSplatImm<6>(N, 2, DAG));
6018 case Intrinsic::loongarch_lsx_vclz_b:
6019 case Intrinsic::loongarch_lsx_vclz_h:
6020 case Intrinsic::loongarch_lsx_vclz_w:
6021 case Intrinsic::loongarch_lsx_vclz_d:
6022 case Intrinsic::loongarch_lasx_xvclz_b:
6023 case Intrinsic::loongarch_lasx_xvclz_h:
6024 case Intrinsic::loongarch_lasx_xvclz_w:
6025 case Intrinsic::loongarch_lasx_xvclz_d:
6026 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6027 case Intrinsic::loongarch_lsx_vpcnt_b:
6028 case Intrinsic::loongarch_lsx_vpcnt_h:
6029 case Intrinsic::loongarch_lsx_vpcnt_w:
6030 case Intrinsic::loongarch_lsx_vpcnt_d:
6031 case Intrinsic::loongarch_lasx_xvpcnt_b:
6032 case Intrinsic::loongarch_lasx_xvpcnt_h:
6033 case Intrinsic::loongarch_lasx_xvpcnt_w:
6034 case Intrinsic::loongarch_lasx_xvpcnt_d:
6035 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6036 case Intrinsic::loongarch_lsx_vbitclr_b:
6037 case Intrinsic::loongarch_lsx_vbitclr_h:
6038 case Intrinsic::loongarch_lsx_vbitclr_w:
6039 case Intrinsic::loongarch_lsx_vbitclr_d:
6040 case Intrinsic::loongarch_lasx_xvbitclr_b:
6041 case Intrinsic::loongarch_lasx_xvbitclr_h:
6042 case Intrinsic::loongarch_lasx_xvbitclr_w:
6043 case Intrinsic::loongarch_lasx_xvbitclr_d:
6044 return lowerVectorBitClear(N, DAG);
6045 case Intrinsic::loongarch_lsx_vbitclri_b:
6046 case Intrinsic::loongarch_lasx_xvbitclri_b:
6047 return lowerVectorBitClearImm<3>(N, DAG);
6048 case Intrinsic::loongarch_lsx_vbitclri_h:
6049 case Intrinsic::loongarch_lasx_xvbitclri_h:
6050 return lowerVectorBitClearImm<4>(N, DAG);
6051 case Intrinsic::loongarch_lsx_vbitclri_w:
6052 case Intrinsic::loongarch_lasx_xvbitclri_w:
6053 return lowerVectorBitClearImm<5>(N, DAG);
6054 case Intrinsic::loongarch_lsx_vbitclri_d:
6055 case Intrinsic::loongarch_lasx_xvbitclri_d:
6056 return lowerVectorBitClearImm<6>(N, DAG);
6057 case Intrinsic::loongarch_lsx_vbitset_b:
6058 case Intrinsic::loongarch_lsx_vbitset_h:
6059 case Intrinsic::loongarch_lsx_vbitset_w:
6060 case Intrinsic::loongarch_lsx_vbitset_d:
6061 case Intrinsic::loongarch_lasx_xvbitset_b:
6062 case Intrinsic::loongarch_lasx_xvbitset_h:
6063 case Intrinsic::loongarch_lasx_xvbitset_w:
6064 case Intrinsic::loongarch_lasx_xvbitset_d: {
6065 EVT VecTy = N->getValueType(0);
6066 SDValue One = DAG.getConstant(1, DL, VecTy);
6067 return DAG.getNode(
6068 ISD::OR, DL, VecTy, N->getOperand(1),
6069 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6070 }
6071 case Intrinsic::loongarch_lsx_vbitseti_b:
6072 case Intrinsic::loongarch_lasx_xvbitseti_b:
6073 return lowerVectorBitSetImm<3>(N, DAG);
6074 case Intrinsic::loongarch_lsx_vbitseti_h:
6075 case Intrinsic::loongarch_lasx_xvbitseti_h:
6076 return lowerVectorBitSetImm<4>(N, DAG);
6077 case Intrinsic::loongarch_lsx_vbitseti_w:
6078 case Intrinsic::loongarch_lasx_xvbitseti_w:
6079 return lowerVectorBitSetImm<5>(N, DAG);
6080 case Intrinsic::loongarch_lsx_vbitseti_d:
6081 case Intrinsic::loongarch_lasx_xvbitseti_d:
6082 return lowerVectorBitSetImm<6>(N, DAG);
6083 case Intrinsic::loongarch_lsx_vbitrev_b:
6084 case Intrinsic::loongarch_lsx_vbitrev_h:
6085 case Intrinsic::loongarch_lsx_vbitrev_w:
6086 case Intrinsic::loongarch_lsx_vbitrev_d:
6087 case Intrinsic::loongarch_lasx_xvbitrev_b:
6088 case Intrinsic::loongarch_lasx_xvbitrev_h:
6089 case Intrinsic::loongarch_lasx_xvbitrev_w:
6090 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6091 EVT VecTy = N->getValueType(0);
6092 SDValue One = DAG.getConstant(1, DL, VecTy);
6093 return DAG.getNode(
6094 ISD::XOR, DL, VecTy, N->getOperand(1),
6095 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6096 }
6097 case Intrinsic::loongarch_lsx_vbitrevi_b:
6098 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6099 return lowerVectorBitRevImm<3>(N, DAG);
6100 case Intrinsic::loongarch_lsx_vbitrevi_h:
6101 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6102 return lowerVectorBitRevImm<4>(N, DAG);
6103 case Intrinsic::loongarch_lsx_vbitrevi_w:
6104 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6105 return lowerVectorBitRevImm<5>(N, DAG);
6106 case Intrinsic::loongarch_lsx_vbitrevi_d:
6107 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6108 return lowerVectorBitRevImm<6>(N, DAG);
6109 case Intrinsic::loongarch_lsx_vfadd_s:
6110 case Intrinsic::loongarch_lsx_vfadd_d:
6111 case Intrinsic::loongarch_lasx_xvfadd_s:
6112 case Intrinsic::loongarch_lasx_xvfadd_d:
6113 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6114 N->getOperand(2));
6115 case Intrinsic::loongarch_lsx_vfsub_s:
6116 case Intrinsic::loongarch_lsx_vfsub_d:
6117 case Intrinsic::loongarch_lasx_xvfsub_s:
6118 case Intrinsic::loongarch_lasx_xvfsub_d:
6119 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6120 N->getOperand(2));
6121 case Intrinsic::loongarch_lsx_vfmul_s:
6122 case Intrinsic::loongarch_lsx_vfmul_d:
6123 case Intrinsic::loongarch_lasx_xvfmul_s:
6124 case Intrinsic::loongarch_lasx_xvfmul_d:
6125 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6126 N->getOperand(2));
6127 case Intrinsic::loongarch_lsx_vfdiv_s:
6128 case Intrinsic::loongarch_lsx_vfdiv_d:
6129 case Intrinsic::loongarch_lasx_xvfdiv_s:
6130 case Intrinsic::loongarch_lasx_xvfdiv_d:
6131 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6132 N->getOperand(2));
6133 case Intrinsic::loongarch_lsx_vfmadd_s:
6134 case Intrinsic::loongarch_lsx_vfmadd_d:
6135 case Intrinsic::loongarch_lasx_xvfmadd_s:
6136 case Intrinsic::loongarch_lasx_xvfmadd_d:
6137 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6138 N->getOperand(2), N->getOperand(3));
6139 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6140 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6141 N->getOperand(1), N->getOperand(2),
6142 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6143 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6144 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6145 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6146 N->getOperand(1), N->getOperand(2),
6147 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6148 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6149 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6150 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6151 N->getOperand(1), N->getOperand(2),
6152 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6153 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6154 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6155 N->getOperand(1), N->getOperand(2),
6156 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6157 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6158 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6159 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6160 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6161 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6162 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6163 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6164 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6165 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6166 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6167 N->getOperand(1)));
6168 case Intrinsic::loongarch_lsx_vreplve_b:
6169 case Intrinsic::loongarch_lsx_vreplve_h:
6170 case Intrinsic::loongarch_lsx_vreplve_w:
6171 case Intrinsic::loongarch_lsx_vreplve_d:
6172 case Intrinsic::loongarch_lasx_xvreplve_b:
6173 case Intrinsic::loongarch_lasx_xvreplve_h:
6174 case Intrinsic::loongarch_lasx_xvreplve_w:
6175 case Intrinsic::loongarch_lasx_xvreplve_d:
6176 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6177 N->getOperand(1),
6178 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6179 N->getOperand(2)));
6180 }
6181 return SDValue();
6182}
6183
6186 const LoongArchSubtarget &Subtarget) {
6187 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6188 // conversion is unnecessary and can be replaced with the
6189 // MOVFR2GR_S_LA64 operand.
6190 SDValue Op0 = N->getOperand(0);
6192 return Op0.getOperand(0);
6193 return SDValue();
6194}
6195
6198 const LoongArchSubtarget &Subtarget) {
6199 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6200 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6201 // operand.
6202 SDValue Op0 = N->getOperand(0);
6204 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6205 "Unexpected value type!");
6206 return Op0.getOperand(0);
6207 }
6208 return SDValue();
6209}
6210
6213 const LoongArchSubtarget &Subtarget) {
6214 MVT VT = N->getSimpleValueType(0);
6215 unsigned NumBits = VT.getScalarSizeInBits();
6216
6217 // Simplify the inputs.
6218 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6219 APInt DemandedMask(APInt::getAllOnes(NumBits));
6220 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6221 return SDValue(N, 0);
6222
6223 return SDValue();
6224}
6225
6226static SDValue
6229 const LoongArchSubtarget &Subtarget) {
6230 SDValue Op0 = N->getOperand(0);
6231 SDLoc DL(N);
6232
6233 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6234 // redundant. Instead, use BuildPairF64's operands directly.
6236 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6237
6238 if (Op0->isUndef()) {
6239 SDValue Lo = DAG.getUNDEF(MVT::i32);
6240 SDValue Hi = DAG.getUNDEF(MVT::i32);
6241 return DCI.CombineTo(N, Lo, Hi);
6242 }
6243
6244 // It's cheaper to materialise two 32-bit integers than to load a double
6245 // from the constant pool and transfer it to integer registers through the
6246 // stack.
6248 APInt V = C->getValueAPF().bitcastToAPInt();
6249 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6250 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6251 return DCI.CombineTo(N, Lo, Hi);
6252 }
6253
6254 return SDValue();
6255}
6256
6257static SDValue
6260 const LoongArchSubtarget &Subtarget) {
6261 if (!DCI.isBeforeLegalize())
6262 return SDValue();
6263
6264 MVT EltVT = N->getSimpleValueType(0);
6265 SDValue Vec = N->getOperand(0);
6266 EVT VecTy = Vec->getValueType(0);
6267 SDValue Idx = N->getOperand(1);
6268 unsigned IdxOp = Idx.getOpcode();
6269 SDLoc DL(N);
6270
6271 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6272 return SDValue();
6273
6274 // Combine:
6275 // t2 = truncate t1
6276 // t3 = {zero/sign/any}_extend t2
6277 // t4 = extract_vector_elt t0, t3
6278 // to:
6279 // t4 = extract_vector_elt t0, t1
6280 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6281 IdxOp == ISD::ANY_EXTEND) {
6282 SDValue IdxOrig = Idx.getOperand(0);
6283 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6284 return SDValue();
6285
6286 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6287 IdxOrig.getOperand(0));
6288 }
6289
6290 return SDValue();
6291}
6292
6294 DAGCombinerInfo &DCI) const {
6295 SelectionDAG &DAG = DCI.DAG;
6296 switch (N->getOpcode()) {
6297 default:
6298 break;
6299 case ISD::AND:
6300 return performANDCombine(N, DAG, DCI, Subtarget);
6301 case ISD::OR:
6302 return performORCombine(N, DAG, DCI, Subtarget);
6303 case ISD::SETCC:
6304 return performSETCCCombine(N, DAG, DCI, Subtarget);
6305 case ISD::SRL:
6306 return performSRLCombine(N, DAG, DCI, Subtarget);
6307 case ISD::BITCAST:
6308 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6310 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6312 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6314 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6316 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6318 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6320 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6323 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6325 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6327 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6328 }
6329 return SDValue();
6330}
6331
6334 if (!ZeroDivCheck)
6335 return MBB;
6336
6337 // Build instructions:
6338 // MBB:
6339 // div(or mod) $dst, $dividend, $divisor
6340 // bne $divisor, $zero, SinkMBB
6341 // BreakMBB:
6342 // break 7 // BRK_DIVZERO
6343 // SinkMBB:
6344 // fallthrough
6345 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6346 MachineFunction::iterator It = ++MBB->getIterator();
6347 MachineFunction *MF = MBB->getParent();
6348 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6349 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6350 MF->insert(It, BreakMBB);
6351 MF->insert(It, SinkMBB);
6352
6353 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6354 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6355 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6356
6357 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6358 DebugLoc DL = MI.getDebugLoc();
6359 MachineOperand &Divisor = MI.getOperand(2);
6360 Register DivisorReg = Divisor.getReg();
6361
6362 // MBB:
6363 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6364 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6365 .addReg(LoongArch::R0)
6366 .addMBB(SinkMBB);
6367 MBB->addSuccessor(BreakMBB);
6368 MBB->addSuccessor(SinkMBB);
6369
6370 // BreakMBB:
6371 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6372 // definition of BRK_DIVZERO.
6373 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6374 BreakMBB->addSuccessor(SinkMBB);
6375
6376 // Clear Divisor's kill flag.
6377 Divisor.setIsKill(false);
6378
6379 return SinkMBB;
6380}
6381
6382static MachineBasicBlock *
6384 const LoongArchSubtarget &Subtarget) {
6385 unsigned CondOpc;
6386 switch (MI.getOpcode()) {
6387 default:
6388 llvm_unreachable("Unexpected opcode");
6389 case LoongArch::PseudoVBZ:
6390 CondOpc = LoongArch::VSETEQZ_V;
6391 break;
6392 case LoongArch::PseudoVBZ_B:
6393 CondOpc = LoongArch::VSETANYEQZ_B;
6394 break;
6395 case LoongArch::PseudoVBZ_H:
6396 CondOpc = LoongArch::VSETANYEQZ_H;
6397 break;
6398 case LoongArch::PseudoVBZ_W:
6399 CondOpc = LoongArch::VSETANYEQZ_W;
6400 break;
6401 case LoongArch::PseudoVBZ_D:
6402 CondOpc = LoongArch::VSETANYEQZ_D;
6403 break;
6404 case LoongArch::PseudoVBNZ:
6405 CondOpc = LoongArch::VSETNEZ_V;
6406 break;
6407 case LoongArch::PseudoVBNZ_B:
6408 CondOpc = LoongArch::VSETALLNEZ_B;
6409 break;
6410 case LoongArch::PseudoVBNZ_H:
6411 CondOpc = LoongArch::VSETALLNEZ_H;
6412 break;
6413 case LoongArch::PseudoVBNZ_W:
6414 CondOpc = LoongArch::VSETALLNEZ_W;
6415 break;
6416 case LoongArch::PseudoVBNZ_D:
6417 CondOpc = LoongArch::VSETALLNEZ_D;
6418 break;
6419 case LoongArch::PseudoXVBZ:
6420 CondOpc = LoongArch::XVSETEQZ_V;
6421 break;
6422 case LoongArch::PseudoXVBZ_B:
6423 CondOpc = LoongArch::XVSETANYEQZ_B;
6424 break;
6425 case LoongArch::PseudoXVBZ_H:
6426 CondOpc = LoongArch::XVSETANYEQZ_H;
6427 break;
6428 case LoongArch::PseudoXVBZ_W:
6429 CondOpc = LoongArch::XVSETANYEQZ_W;
6430 break;
6431 case LoongArch::PseudoXVBZ_D:
6432 CondOpc = LoongArch::XVSETANYEQZ_D;
6433 break;
6434 case LoongArch::PseudoXVBNZ:
6435 CondOpc = LoongArch::XVSETNEZ_V;
6436 break;
6437 case LoongArch::PseudoXVBNZ_B:
6438 CondOpc = LoongArch::XVSETALLNEZ_B;
6439 break;
6440 case LoongArch::PseudoXVBNZ_H:
6441 CondOpc = LoongArch::XVSETALLNEZ_H;
6442 break;
6443 case LoongArch::PseudoXVBNZ_W:
6444 CondOpc = LoongArch::XVSETALLNEZ_W;
6445 break;
6446 case LoongArch::PseudoXVBNZ_D:
6447 CondOpc = LoongArch::XVSETALLNEZ_D;
6448 break;
6449 }
6450
6451 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6452 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6453 DebugLoc DL = MI.getDebugLoc();
6456
6457 MachineFunction *F = BB->getParent();
6458 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6459 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6460 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6461
6462 F->insert(It, FalseBB);
6463 F->insert(It, TrueBB);
6464 F->insert(It, SinkBB);
6465
6466 // Transfer the remainder of MBB and its successor edges to Sink.
6467 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6469
6470 // Insert the real instruction to BB.
6471 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6472 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6473
6474 // Insert branch.
6475 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6476 BB->addSuccessor(FalseBB);
6477 BB->addSuccessor(TrueBB);
6478
6479 // FalseBB.
6480 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6481 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6482 .addReg(LoongArch::R0)
6483 .addImm(0);
6484 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6485 FalseBB->addSuccessor(SinkBB);
6486
6487 // TrueBB.
6488 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6489 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6490 .addReg(LoongArch::R0)
6491 .addImm(1);
6492 TrueBB->addSuccessor(SinkBB);
6493
6494 // SinkBB: merge the results.
6495 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6496 MI.getOperand(0).getReg())
6497 .addReg(RD1)
6498 .addMBB(FalseBB)
6499 .addReg(RD2)
6500 .addMBB(TrueBB);
6501
6502 // The pseudo instruction is gone now.
6503 MI.eraseFromParent();
6504 return SinkBB;
6505}
6506
6507static MachineBasicBlock *
6509 const LoongArchSubtarget &Subtarget) {
6510 unsigned InsOp;
6511 unsigned BroadcastOp;
6512 unsigned HalfSize;
6513 switch (MI.getOpcode()) {
6514 default:
6515 llvm_unreachable("Unexpected opcode");
6516 case LoongArch::PseudoXVINSGR2VR_B:
6517 HalfSize = 16;
6518 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6519 InsOp = LoongArch::XVEXTRINS_B;
6520 break;
6521 case LoongArch::PseudoXVINSGR2VR_H:
6522 HalfSize = 8;
6523 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6524 InsOp = LoongArch::XVEXTRINS_H;
6525 break;
6526 }
6527 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6528 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6529 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6530 DebugLoc DL = MI.getDebugLoc();
6532 // XDst = vector_insert XSrc, Elt, Idx
6533 Register XDst = MI.getOperand(0).getReg();
6534 Register XSrc = MI.getOperand(1).getReg();
6535 Register Elt = MI.getOperand(2).getReg();
6536 unsigned Idx = MI.getOperand(3).getImm();
6537
6538 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6539 Idx < HalfSize) {
6540 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6541 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6542
6543 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6544 .addReg(XSrc, 0, LoongArch::sub_128);
6545 BuildMI(*BB, MI, DL,
6546 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6547 : LoongArch::VINSGR2VR_B),
6548 ScratchSubReg2)
6549 .addReg(ScratchSubReg1)
6550 .addReg(Elt)
6551 .addImm(Idx);
6552
6553 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6554 .addImm(0)
6555 .addReg(ScratchSubReg2)
6556 .addImm(LoongArch::sub_128);
6557 } else {
6558 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6559 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6560
6561 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6562
6563 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6564 .addReg(ScratchReg1)
6565 .addReg(XSrc)
6566 .addImm(Idx >= HalfSize ? 48 : 18);
6567
6568 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6569 .addReg(XSrc)
6570 .addReg(ScratchReg2)
6571 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6572 }
6573
6574 MI.eraseFromParent();
6575 return BB;
6576}
6577
6580 const LoongArchSubtarget &Subtarget) {
6581 assert(Subtarget.hasExtLSX());
6582 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6583 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6584 DebugLoc DL = MI.getDebugLoc();
6586 Register Dst = MI.getOperand(0).getReg();
6587 Register Src = MI.getOperand(1).getReg();
6588 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6589 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6590 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6591
6592 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6593 BuildMI(*BB, MI, DL,
6594 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6595 : LoongArch::VINSGR2VR_W),
6596 ScratchReg2)
6597 .addReg(ScratchReg1)
6598 .addReg(Src)
6599 .addImm(0);
6600 BuildMI(
6601 *BB, MI, DL,
6602 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6603 ScratchReg3)
6604 .addReg(ScratchReg2);
6605 BuildMI(*BB, MI, DL,
6606 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6607 : LoongArch::VPICKVE2GR_W),
6608 Dst)
6609 .addReg(ScratchReg3)
6610 .addImm(0);
6611
6612 MI.eraseFromParent();
6613 return BB;
6614}
6615
6616static MachineBasicBlock *
6618 const LoongArchSubtarget &Subtarget) {
6619 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6620 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6621 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6623 Register Dst = MI.getOperand(0).getReg();
6624 Register Src = MI.getOperand(1).getReg();
6625 DebugLoc DL = MI.getDebugLoc();
6626 unsigned EleBits = 8;
6627 unsigned NotOpc = 0;
6628 unsigned MskOpc;
6629
6630 switch (MI.getOpcode()) {
6631 default:
6632 llvm_unreachable("Unexpected opcode");
6633 case LoongArch::PseudoVMSKLTZ_B:
6634 MskOpc = LoongArch::VMSKLTZ_B;
6635 break;
6636 case LoongArch::PseudoVMSKLTZ_H:
6637 MskOpc = LoongArch::VMSKLTZ_H;
6638 EleBits = 16;
6639 break;
6640 case LoongArch::PseudoVMSKLTZ_W:
6641 MskOpc = LoongArch::VMSKLTZ_W;
6642 EleBits = 32;
6643 break;
6644 case LoongArch::PseudoVMSKLTZ_D:
6645 MskOpc = LoongArch::VMSKLTZ_D;
6646 EleBits = 64;
6647 break;
6648 case LoongArch::PseudoVMSKGEZ_B:
6649 MskOpc = LoongArch::VMSKGEZ_B;
6650 break;
6651 case LoongArch::PseudoVMSKEQZ_B:
6652 MskOpc = LoongArch::VMSKNZ_B;
6653 NotOpc = LoongArch::VNOR_V;
6654 break;
6655 case LoongArch::PseudoVMSKNEZ_B:
6656 MskOpc = LoongArch::VMSKNZ_B;
6657 break;
6658 case LoongArch::PseudoXVMSKLTZ_B:
6659 MskOpc = LoongArch::XVMSKLTZ_B;
6660 RC = &LoongArch::LASX256RegClass;
6661 break;
6662 case LoongArch::PseudoXVMSKLTZ_H:
6663 MskOpc = LoongArch::XVMSKLTZ_H;
6664 RC = &LoongArch::LASX256RegClass;
6665 EleBits = 16;
6666 break;
6667 case LoongArch::PseudoXVMSKLTZ_W:
6668 MskOpc = LoongArch::XVMSKLTZ_W;
6669 RC = &LoongArch::LASX256RegClass;
6670 EleBits = 32;
6671 break;
6672 case LoongArch::PseudoXVMSKLTZ_D:
6673 MskOpc = LoongArch::XVMSKLTZ_D;
6674 RC = &LoongArch::LASX256RegClass;
6675 EleBits = 64;
6676 break;
6677 case LoongArch::PseudoXVMSKGEZ_B:
6678 MskOpc = LoongArch::XVMSKGEZ_B;
6679 RC = &LoongArch::LASX256RegClass;
6680 break;
6681 case LoongArch::PseudoXVMSKEQZ_B:
6682 MskOpc = LoongArch::XVMSKNZ_B;
6683 NotOpc = LoongArch::XVNOR_V;
6684 RC = &LoongArch::LASX256RegClass;
6685 break;
6686 case LoongArch::PseudoXVMSKNEZ_B:
6687 MskOpc = LoongArch::XVMSKNZ_B;
6688 RC = &LoongArch::LASX256RegClass;
6689 break;
6690 }
6691
6692 Register Msk = MRI.createVirtualRegister(RC);
6693 if (NotOpc) {
6694 Register Tmp = MRI.createVirtualRegister(RC);
6695 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6696 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6697 .addReg(Tmp, RegState::Kill)
6698 .addReg(Tmp, RegState::Kill);
6699 } else {
6700 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6701 }
6702
6703 if (TRI->getRegSizeInBits(*RC) > 128) {
6704 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6705 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6706 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6707 .addReg(Msk)
6708 .addImm(0);
6709 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6710 .addReg(Msk, RegState::Kill)
6711 .addImm(4);
6712 BuildMI(*BB, MI, DL,
6713 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6714 : LoongArch::BSTRINS_W),
6715 Dst)
6718 .addImm(256 / EleBits - 1)
6719 .addImm(128 / EleBits);
6720 } else {
6721 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6722 .addReg(Msk, RegState::Kill)
6723 .addImm(0);
6724 }
6725
6726 MI.eraseFromParent();
6727 return BB;
6728}
6729
6730static MachineBasicBlock *
6732 const LoongArchSubtarget &Subtarget) {
6733 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6734 "Unexpected instruction");
6735
6736 MachineFunction &MF = *BB->getParent();
6737 DebugLoc DL = MI.getDebugLoc();
6739 Register LoReg = MI.getOperand(0).getReg();
6740 Register HiReg = MI.getOperand(1).getReg();
6741 Register SrcReg = MI.getOperand(2).getReg();
6742
6743 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6744 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6745 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6746 MI.eraseFromParent(); // The pseudo instruction is gone now.
6747 return BB;
6748}
6749
6750static MachineBasicBlock *
6752 const LoongArchSubtarget &Subtarget) {
6753 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6754 "Unexpected instruction");
6755
6756 MachineFunction &MF = *BB->getParent();
6757 DebugLoc DL = MI.getDebugLoc();
6760 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6761 Register DstReg = MI.getOperand(0).getReg();
6762 Register LoReg = MI.getOperand(1).getReg();
6763 Register HiReg = MI.getOperand(2).getReg();
6764
6765 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6766 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6767 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6768 .addReg(TmpReg, RegState::Kill)
6769 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6770 MI.eraseFromParent(); // The pseudo instruction is gone now.
6771 return BB;
6772}
6773
6775 switch (MI.getOpcode()) {
6776 default:
6777 return false;
6778 case LoongArch::Select_GPR_Using_CC_GPR:
6779 return true;
6780 }
6781}
6782
6783static MachineBasicBlock *
6785 const LoongArchSubtarget &Subtarget) {
6786 // To "insert" Select_* instructions, we actually have to insert the triangle
6787 // control-flow pattern. The incoming instructions know the destination vreg
6788 // to set, the condition code register to branch on, the true/false values to
6789 // select between, and the condcode to use to select the appropriate branch.
6790 //
6791 // We produce the following control flow:
6792 // HeadMBB
6793 // | \
6794 // | IfFalseMBB
6795 // | /
6796 // TailMBB
6797 //
6798 // When we find a sequence of selects we attempt to optimize their emission
6799 // by sharing the control flow. Currently we only handle cases where we have
6800 // multiple selects with the exact same condition (same LHS, RHS and CC).
6801 // The selects may be interleaved with other instructions if the other
6802 // instructions meet some requirements we deem safe:
6803 // - They are not pseudo instructions.
6804 // - They are debug instructions. Otherwise,
6805 // - They do not have side-effects, do not access memory and their inputs do
6806 // not depend on the results of the select pseudo-instructions.
6807 // The TrueV/FalseV operands of the selects cannot depend on the result of
6808 // previous selects in the sequence.
6809 // These conditions could be further relaxed. See the X86 target for a
6810 // related approach and more information.
6811
6812 Register LHS = MI.getOperand(1).getReg();
6813 Register RHS;
6814 if (MI.getOperand(2).isReg())
6815 RHS = MI.getOperand(2).getReg();
6816 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6817
6818 SmallVector<MachineInstr *, 4> SelectDebugValues;
6819 SmallSet<Register, 4> SelectDests;
6820 SelectDests.insert(MI.getOperand(0).getReg());
6821
6822 MachineInstr *LastSelectPseudo = &MI;
6823 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6824 SequenceMBBI != E; ++SequenceMBBI) {
6825 if (SequenceMBBI->isDebugInstr())
6826 continue;
6827 if (isSelectPseudo(*SequenceMBBI)) {
6828 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6829 !SequenceMBBI->getOperand(2).isReg() ||
6830 SequenceMBBI->getOperand(2).getReg() != RHS ||
6831 SequenceMBBI->getOperand(3).getImm() != CC ||
6832 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6833 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6834 break;
6835 LastSelectPseudo = &*SequenceMBBI;
6836 SequenceMBBI->collectDebugValues(SelectDebugValues);
6837 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6838 continue;
6839 }
6840 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6841 SequenceMBBI->mayLoadOrStore() ||
6842 SequenceMBBI->usesCustomInsertionHook())
6843 break;
6844 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6845 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6846 }))
6847 break;
6848 }
6849
6850 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6851 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6852 DebugLoc DL = MI.getDebugLoc();
6854
6855 MachineBasicBlock *HeadMBB = BB;
6856 MachineFunction *F = BB->getParent();
6857 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6858 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6859
6860 F->insert(I, IfFalseMBB);
6861 F->insert(I, TailMBB);
6862
6863 // Set the call frame size on entry to the new basic blocks.
6864 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6865 IfFalseMBB->setCallFrameSize(CallFrameSize);
6866 TailMBB->setCallFrameSize(CallFrameSize);
6867
6868 // Transfer debug instructions associated with the selects to TailMBB.
6869 for (MachineInstr *DebugInstr : SelectDebugValues) {
6870 TailMBB->push_back(DebugInstr->removeFromParent());
6871 }
6872
6873 // Move all instructions after the sequence to TailMBB.
6874 TailMBB->splice(TailMBB->end(), HeadMBB,
6875 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6876 // Update machine-CFG edges by transferring all successors of the current
6877 // block to the new block which will contain the Phi nodes for the selects.
6878 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6879 // Set the successors for HeadMBB.
6880 HeadMBB->addSuccessor(IfFalseMBB);
6881 HeadMBB->addSuccessor(TailMBB);
6882
6883 // Insert appropriate branch.
6884 if (MI.getOperand(2).isImm())
6885 BuildMI(HeadMBB, DL, TII.get(CC))
6886 .addReg(LHS)
6887 .addImm(MI.getOperand(2).getImm())
6888 .addMBB(TailMBB);
6889 else
6890 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6891
6892 // IfFalseMBB just falls through to TailMBB.
6893 IfFalseMBB->addSuccessor(TailMBB);
6894
6895 // Create PHIs for all of the select pseudo-instructions.
6896 auto SelectMBBI = MI.getIterator();
6897 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6898 auto InsertionPoint = TailMBB->begin();
6899 while (SelectMBBI != SelectEnd) {
6900 auto Next = std::next(SelectMBBI);
6901 if (isSelectPseudo(*SelectMBBI)) {
6902 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6903 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6904 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6905 .addReg(SelectMBBI->getOperand(4).getReg())
6906 .addMBB(HeadMBB)
6907 .addReg(SelectMBBI->getOperand(5).getReg())
6908 .addMBB(IfFalseMBB);
6909 SelectMBBI->eraseFromParent();
6910 }
6911 SelectMBBI = Next;
6912 }
6913
6914 F->getProperties().resetNoPHIs();
6915 return TailMBB;
6916}
6917
6918MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6919 MachineInstr &MI, MachineBasicBlock *BB) const {
6920 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6921 DebugLoc DL = MI.getDebugLoc();
6922
6923 switch (MI.getOpcode()) {
6924 default:
6925 llvm_unreachable("Unexpected instr type to insert");
6926 case LoongArch::DIV_W:
6927 case LoongArch::DIV_WU:
6928 case LoongArch::MOD_W:
6929 case LoongArch::MOD_WU:
6930 case LoongArch::DIV_D:
6931 case LoongArch::DIV_DU:
6932 case LoongArch::MOD_D:
6933 case LoongArch::MOD_DU:
6934 return insertDivByZeroTrap(MI, BB);
6935 break;
6936 case LoongArch::WRFCSR: {
6937 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6938 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6939 .addReg(MI.getOperand(1).getReg());
6940 MI.eraseFromParent();
6941 return BB;
6942 }
6943 case LoongArch::RDFCSR: {
6944 MachineInstr *ReadFCSR =
6945 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6946 MI.getOperand(0).getReg())
6947 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6948 ReadFCSR->getOperand(1).setIsUndef();
6949 MI.eraseFromParent();
6950 return BB;
6951 }
6952 case LoongArch::Select_GPR_Using_CC_GPR:
6953 return emitSelectPseudo(MI, BB, Subtarget);
6954 case LoongArch::BuildPairF64Pseudo:
6955 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6956 case LoongArch::SplitPairF64Pseudo:
6957 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6958 case LoongArch::PseudoVBZ:
6959 case LoongArch::PseudoVBZ_B:
6960 case LoongArch::PseudoVBZ_H:
6961 case LoongArch::PseudoVBZ_W:
6962 case LoongArch::PseudoVBZ_D:
6963 case LoongArch::PseudoVBNZ:
6964 case LoongArch::PseudoVBNZ_B:
6965 case LoongArch::PseudoVBNZ_H:
6966 case LoongArch::PseudoVBNZ_W:
6967 case LoongArch::PseudoVBNZ_D:
6968 case LoongArch::PseudoXVBZ:
6969 case LoongArch::PseudoXVBZ_B:
6970 case LoongArch::PseudoXVBZ_H:
6971 case LoongArch::PseudoXVBZ_W:
6972 case LoongArch::PseudoXVBZ_D:
6973 case LoongArch::PseudoXVBNZ:
6974 case LoongArch::PseudoXVBNZ_B:
6975 case LoongArch::PseudoXVBNZ_H:
6976 case LoongArch::PseudoXVBNZ_W:
6977 case LoongArch::PseudoXVBNZ_D:
6978 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6979 case LoongArch::PseudoXVINSGR2VR_B:
6980 case LoongArch::PseudoXVINSGR2VR_H:
6981 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6982 case LoongArch::PseudoCTPOP:
6983 return emitPseudoCTPOP(MI, BB, Subtarget);
6984 case LoongArch::PseudoVMSKLTZ_B:
6985 case LoongArch::PseudoVMSKLTZ_H:
6986 case LoongArch::PseudoVMSKLTZ_W:
6987 case LoongArch::PseudoVMSKLTZ_D:
6988 case LoongArch::PseudoVMSKGEZ_B:
6989 case LoongArch::PseudoVMSKEQZ_B:
6990 case LoongArch::PseudoVMSKNEZ_B:
6991 case LoongArch::PseudoXVMSKLTZ_B:
6992 case LoongArch::PseudoXVMSKLTZ_H:
6993 case LoongArch::PseudoXVMSKLTZ_W:
6994 case LoongArch::PseudoXVMSKLTZ_D:
6995 case LoongArch::PseudoXVMSKGEZ_B:
6996 case LoongArch::PseudoXVMSKEQZ_B:
6997 case LoongArch::PseudoXVMSKNEZ_B:
6998 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6999 case TargetOpcode::STATEPOINT:
7000 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7001 // while bl call instruction (where statepoint will be lowered at the
7002 // end) has implicit def. This def is early-clobber as it will be set at
7003 // the moment of the call and earlier than any use is read.
7004 // Add this implicit dead def here as a workaround.
7005 MI.addOperand(*MI.getMF(),
7007 LoongArch::R1, /*isDef*/ true,
7008 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7009 /*isUndef*/ false, /*isEarlyClobber*/ true));
7010 if (!Subtarget.is64Bit())
7011 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7012 return emitPatchPoint(MI, BB);
7013 }
7014}
7015
7017 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7018 unsigned *Fast) const {
7019 if (!Subtarget.hasUAL())
7020 return false;
7021
7022 // TODO: set reasonable speed number.
7023 if (Fast)
7024 *Fast = 1;
7025 return true;
7026}
7027
7028const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7029 switch ((LoongArchISD::NodeType)Opcode) {
7031 break;
7032
7033#define NODE_NAME_CASE(node) \
7034 case LoongArchISD::node: \
7035 return "LoongArchISD::" #node;
7036
7037 // TODO: Add more target-dependent nodes later.
7038 NODE_NAME_CASE(CALL)
7039 NODE_NAME_CASE(CALL_MEDIUM)
7040 NODE_NAME_CASE(CALL_LARGE)
7041 NODE_NAME_CASE(RET)
7042 NODE_NAME_CASE(TAIL)
7043 NODE_NAME_CASE(TAIL_MEDIUM)
7044 NODE_NAME_CASE(TAIL_LARGE)
7045 NODE_NAME_CASE(SELECT_CC)
7046 NODE_NAME_CASE(BR_CC)
7047 NODE_NAME_CASE(BRCOND)
7048 NODE_NAME_CASE(SLL_W)
7049 NODE_NAME_CASE(SRA_W)
7050 NODE_NAME_CASE(SRL_W)
7051 NODE_NAME_CASE(BSTRINS)
7052 NODE_NAME_CASE(BSTRPICK)
7053 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7054 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7055 NODE_NAME_CASE(FTINT)
7056 NODE_NAME_CASE(BUILD_PAIR_F64)
7057 NODE_NAME_CASE(SPLIT_PAIR_F64)
7058 NODE_NAME_CASE(REVB_2H)
7059 NODE_NAME_CASE(REVB_2W)
7060 NODE_NAME_CASE(BITREV_4B)
7061 NODE_NAME_CASE(BITREV_8B)
7062 NODE_NAME_CASE(BITREV_W)
7063 NODE_NAME_CASE(ROTR_W)
7064 NODE_NAME_CASE(ROTL_W)
7065 NODE_NAME_CASE(DIV_W)
7066 NODE_NAME_CASE(DIV_WU)
7067 NODE_NAME_CASE(MOD_W)
7068 NODE_NAME_CASE(MOD_WU)
7069 NODE_NAME_CASE(CLZ_W)
7070 NODE_NAME_CASE(CTZ_W)
7071 NODE_NAME_CASE(DBAR)
7072 NODE_NAME_CASE(IBAR)
7073 NODE_NAME_CASE(BREAK)
7074 NODE_NAME_CASE(SYSCALL)
7075 NODE_NAME_CASE(CRC_W_B_W)
7076 NODE_NAME_CASE(CRC_W_H_W)
7077 NODE_NAME_CASE(CRC_W_W_W)
7078 NODE_NAME_CASE(CRC_W_D_W)
7079 NODE_NAME_CASE(CRCC_W_B_W)
7080 NODE_NAME_CASE(CRCC_W_H_W)
7081 NODE_NAME_CASE(CRCC_W_W_W)
7082 NODE_NAME_CASE(CRCC_W_D_W)
7083 NODE_NAME_CASE(CSRRD)
7084 NODE_NAME_CASE(CSRWR)
7085 NODE_NAME_CASE(CSRXCHG)
7086 NODE_NAME_CASE(IOCSRRD_B)
7087 NODE_NAME_CASE(IOCSRRD_H)
7088 NODE_NAME_CASE(IOCSRRD_W)
7089 NODE_NAME_CASE(IOCSRRD_D)
7090 NODE_NAME_CASE(IOCSRWR_B)
7091 NODE_NAME_CASE(IOCSRWR_H)
7092 NODE_NAME_CASE(IOCSRWR_W)
7093 NODE_NAME_CASE(IOCSRWR_D)
7094 NODE_NAME_CASE(CPUCFG)
7095 NODE_NAME_CASE(MOVGR2FCSR)
7096 NODE_NAME_CASE(MOVFCSR2GR)
7097 NODE_NAME_CASE(CACOP_D)
7098 NODE_NAME_CASE(CACOP_W)
7099 NODE_NAME_CASE(VSHUF)
7100 NODE_NAME_CASE(VPICKEV)
7101 NODE_NAME_CASE(VPICKOD)
7102 NODE_NAME_CASE(VPACKEV)
7103 NODE_NAME_CASE(VPACKOD)
7104 NODE_NAME_CASE(VILVL)
7105 NODE_NAME_CASE(VILVH)
7106 NODE_NAME_CASE(VSHUF4I)
7107 NODE_NAME_CASE(VREPLVEI)
7108 NODE_NAME_CASE(VREPLGR2VR)
7109 NODE_NAME_CASE(XVPERMI)
7110 NODE_NAME_CASE(XVPERM)
7111 NODE_NAME_CASE(VPICK_SEXT_ELT)
7112 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7113 NODE_NAME_CASE(VREPLVE)
7114 NODE_NAME_CASE(VALL_ZERO)
7115 NODE_NAME_CASE(VANY_ZERO)
7116 NODE_NAME_CASE(VALL_NONZERO)
7117 NODE_NAME_CASE(VANY_NONZERO)
7118 NODE_NAME_CASE(FRECIPE)
7119 NODE_NAME_CASE(FRSQRTE)
7120 NODE_NAME_CASE(VSLLI)
7121 NODE_NAME_CASE(VSRLI)
7122 NODE_NAME_CASE(VBSLL)
7123 NODE_NAME_CASE(VBSRL)
7124 NODE_NAME_CASE(VLDREPL)
7125 NODE_NAME_CASE(VMSKLTZ)
7126 NODE_NAME_CASE(VMSKGEZ)
7127 NODE_NAME_CASE(VMSKEQZ)
7128 NODE_NAME_CASE(VMSKNEZ)
7129 NODE_NAME_CASE(XVMSKLTZ)
7130 NODE_NAME_CASE(XVMSKGEZ)
7131 NODE_NAME_CASE(XVMSKEQZ)
7132 NODE_NAME_CASE(XVMSKNEZ)
7133 NODE_NAME_CASE(VHADDW)
7134 }
7135#undef NODE_NAME_CASE
7136 return nullptr;
7137}
7138
7139//===----------------------------------------------------------------------===//
7140// Calling Convention Implementation
7141//===----------------------------------------------------------------------===//
7142
7143// Eight general-purpose registers a0-a7 used for passing integer arguments,
7144// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7145// fixed-point arguments, and floating-point arguments when no FPR is available
7146// or with soft float ABI.
7147const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7148 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7149 LoongArch::R10, LoongArch::R11};
7150// Eight floating-point registers fa0-fa7 used for passing floating-point
7151// arguments, and fa0-fa1 are also used to return values.
7152const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7153 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7154 LoongArch::F6, LoongArch::F7};
7155// FPR32 and FPR64 alias each other.
7157 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7158 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7159
7160const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7161 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7162 LoongArch::VR6, LoongArch::VR7};
7163
7164const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7165 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7166 LoongArch::XR6, LoongArch::XR7};
7167
7168// Pass a 2*GRLen argument that has been split into two GRLen values through
7169// registers or the stack as necessary.
7170static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7171 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7172 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7173 ISD::ArgFlagsTy ArgFlags2) {
7174 unsigned GRLenInBytes = GRLen / 8;
7175 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7176 // At least one half can be passed via register.
7177 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7178 VA1.getLocVT(), CCValAssign::Full));
7179 } else {
7180 // Both halves must be passed on the stack, with proper alignment.
7181 Align StackAlign =
7182 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7183 State.addLoc(
7185 State.AllocateStack(GRLenInBytes, StackAlign),
7186 VA1.getLocVT(), CCValAssign::Full));
7187 State.addLoc(CCValAssign::getMem(
7188 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7189 LocVT2, CCValAssign::Full));
7190 return false;
7191 }
7192 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7193 // The second half can also be passed via register.
7194 State.addLoc(
7195 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7196 } else {
7197 // The second half is passed via the stack, without additional alignment.
7198 State.addLoc(CCValAssign::getMem(
7199 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7200 LocVT2, CCValAssign::Full));
7201 }
7202 return false;
7203}
7204
7205// Implements the LoongArch calling convention. Returns true upon failure.
7207 unsigned ValNo, MVT ValVT,
7208 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7209 CCState &State, bool IsRet, Type *OrigTy) {
7210 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7211 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7212 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7213 MVT LocVT = ValVT;
7214
7215 // Any return value split into more than two values can't be returned
7216 // directly.
7217 if (IsRet && ValNo > 1)
7218 return true;
7219
7220 // If passing a variadic argument, or if no FPR is available.
7221 bool UseGPRForFloat = true;
7222
7223 switch (ABI) {
7224 default:
7225 llvm_unreachable("Unexpected ABI");
7226 break;
7231 UseGPRForFloat = ArgFlags.isVarArg();
7232 break;
7235 break;
7236 }
7237
7238 // If this is a variadic argument, the LoongArch calling convention requires
7239 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7240 // byte alignment. An aligned register should be used regardless of whether
7241 // the original argument was split during legalisation or not. The argument
7242 // will not be passed by registers if the original type is larger than
7243 // 2*GRLen, so the register alignment rule does not apply.
7244 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7245 if (ArgFlags.isVarArg() &&
7246 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7247 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7248 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7249 // Skip 'odd' register if necessary.
7250 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7251 State.AllocateReg(ArgGPRs);
7252 }
7253
7254 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7255 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7256 State.getPendingArgFlags();
7257
7258 assert(PendingLocs.size() == PendingArgFlags.size() &&
7259 "PendingLocs and PendingArgFlags out of sync");
7260
7261 // FPR32 and FPR64 alias each other.
7262 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7263 UseGPRForFloat = true;
7264
7265 if (UseGPRForFloat && ValVT == MVT::f32) {
7266 LocVT = GRLenVT;
7267 LocInfo = CCValAssign::BCvt;
7268 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7269 LocVT = MVT::i64;
7270 LocInfo = CCValAssign::BCvt;
7271 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7272 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7273 // registers are exhausted.
7274 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7275 // Depending on available argument GPRS, f64 may be passed in a pair of
7276 // GPRs, split between a GPR and the stack, or passed completely on the
7277 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7278 // cases.
7279 MCRegister Reg = State.AllocateReg(ArgGPRs);
7280 if (!Reg) {
7281 int64_t StackOffset = State.AllocateStack(8, Align(8));
7282 State.addLoc(
7283 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7284 return false;
7285 }
7286 LocVT = MVT::i32;
7287 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7288 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7289 if (HiReg) {
7290 State.addLoc(
7291 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7292 } else {
7293 int64_t StackOffset = State.AllocateStack(4, Align(4));
7294 State.addLoc(
7295 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7296 }
7297 return false;
7298 }
7299
7300 // Split arguments might be passed indirectly, so keep track of the pending
7301 // values.
7302 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7303 LocVT = GRLenVT;
7304 LocInfo = CCValAssign::Indirect;
7305 PendingLocs.push_back(
7306 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7307 PendingArgFlags.push_back(ArgFlags);
7308 if (!ArgFlags.isSplitEnd()) {
7309 return false;
7310 }
7311 }
7312
7313 // If the split argument only had two elements, it should be passed directly
7314 // in registers or on the stack.
7315 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7316 PendingLocs.size() <= 2) {
7317 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7318 // Apply the normal calling convention rules to the first half of the
7319 // split argument.
7320 CCValAssign VA = PendingLocs[0];
7321 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7322 PendingLocs.clear();
7323 PendingArgFlags.clear();
7324 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7325 ArgFlags);
7326 }
7327
7328 // Allocate to a register if possible, or else a stack slot.
7329 Register Reg;
7330 unsigned StoreSizeBytes = GRLen / 8;
7331 Align StackAlign = Align(GRLen / 8);
7332
7333 if (ValVT == MVT::f32 && !UseGPRForFloat)
7334 Reg = State.AllocateReg(ArgFPR32s);
7335 else if (ValVT == MVT::f64 && !UseGPRForFloat)
7336 Reg = State.AllocateReg(ArgFPR64s);
7337 else if (ValVT.is128BitVector())
7338 Reg = State.AllocateReg(ArgVRs);
7339 else if (ValVT.is256BitVector())
7340 Reg = State.AllocateReg(ArgXRs);
7341 else
7342 Reg = State.AllocateReg(ArgGPRs);
7343
7344 unsigned StackOffset =
7345 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7346
7347 // If we reach this point and PendingLocs is non-empty, we must be at the
7348 // end of a split argument that must be passed indirectly.
7349 if (!PendingLocs.empty()) {
7350 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7351 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7352 for (auto &It : PendingLocs) {
7353 if (Reg)
7354 It.convertToReg(Reg);
7355 else
7356 It.convertToMem(StackOffset);
7357 State.addLoc(It);
7358 }
7359 PendingLocs.clear();
7360 PendingArgFlags.clear();
7361 return false;
7362 }
7363 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7364 "Expected an GRLenVT at this stage");
7365
7366 if (Reg) {
7367 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7368 return false;
7369 }
7370
7371 // When a floating-point value is passed on the stack, no bit-cast is needed.
7372 if (ValVT.isFloatingPoint()) {
7373 LocVT = ValVT;
7374 LocInfo = CCValAssign::Full;
7375 }
7376
7377 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7378 return false;
7379}
7380
7381void LoongArchTargetLowering::analyzeInputArgs(
7382 MachineFunction &MF, CCState &CCInfo,
7383 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7384 LoongArchCCAssignFn Fn) const {
7385 FunctionType *FType = MF.getFunction().getFunctionType();
7386 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7387 MVT ArgVT = Ins[i].VT;
7388 Type *ArgTy = nullptr;
7389 if (IsRet)
7390 ArgTy = FType->getReturnType();
7391 else if (Ins[i].isOrigArg())
7392 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7394 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7395 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7396 CCInfo, IsRet, ArgTy)) {
7397 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7398 << '\n');
7399 llvm_unreachable("");
7400 }
7401 }
7402}
7403
7404void LoongArchTargetLowering::analyzeOutputArgs(
7405 MachineFunction &MF, CCState &CCInfo,
7406 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7407 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7408 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7409 MVT ArgVT = Outs[i].VT;
7410 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7412 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7413 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7414 CCInfo, IsRet, OrigTy)) {
7415 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7416 << "\n");
7417 llvm_unreachable("");
7418 }
7419 }
7420}
7421
7422// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7423// values.
7425 const CCValAssign &VA, const SDLoc &DL) {
7426 switch (VA.getLocInfo()) {
7427 default:
7428 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7429 case CCValAssign::Full:
7431 break;
7432 case CCValAssign::BCvt:
7433 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7434 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7435 else
7436 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7437 break;
7438 }
7439 return Val;
7440}
7441
7443 const CCValAssign &VA, const SDLoc &DL,
7444 const ISD::InputArg &In,
7445 const LoongArchTargetLowering &TLI) {
7448 EVT LocVT = VA.getLocVT();
7449 SDValue Val;
7450 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7451 Register VReg = RegInfo.createVirtualRegister(RC);
7452 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7453 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7454
7455 // If input is sign extended from 32 bits, note it for the OptW pass.
7456 if (In.isOrigArg()) {
7457 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7458 if (OrigArg->getType()->isIntegerTy()) {
7459 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7460 // An input zero extended from i31 can also be considered sign extended.
7461 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7462 (BitWidth < 32 && In.Flags.isZExt())) {
7465 LAFI->addSExt32Register(VReg);
7466 }
7467 }
7468 }
7469
7470 return convertLocVTToValVT(DAG, Val, VA, DL);
7471}
7472
7473// The caller is responsible for loading the full value if the argument is
7474// passed with CCValAssign::Indirect.
7476 const CCValAssign &VA, const SDLoc &DL) {
7478 MachineFrameInfo &MFI = MF.getFrameInfo();
7479 EVT ValVT = VA.getValVT();
7480 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7481 /*IsImmutable=*/true);
7482 SDValue FIN = DAG.getFrameIndex(
7484
7485 ISD::LoadExtType ExtType;
7486 switch (VA.getLocInfo()) {
7487 default:
7488 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7489 case CCValAssign::Full:
7491 case CCValAssign::BCvt:
7492 ExtType = ISD::NON_EXTLOAD;
7493 break;
7494 }
7495 return DAG.getExtLoad(
7496 ExtType, DL, VA.getLocVT(), Chain, FIN,
7498}
7499
7501 const CCValAssign &VA,
7502 const CCValAssign &HiVA,
7503 const SDLoc &DL) {
7504 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7505 "Unexpected VA");
7507 MachineFrameInfo &MFI = MF.getFrameInfo();
7509
7510 assert(VA.isRegLoc() && "Expected register VA assignment");
7511
7512 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7513 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7514 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7515 SDValue Hi;
7516 if (HiVA.isMemLoc()) {
7517 // Second half of f64 is passed on the stack.
7518 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7519 /*IsImmutable=*/true);
7520 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7521 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7523 } else {
7524 // Second half of f64 is passed in another GPR.
7525 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7526 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7527 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7528 }
7529 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7530}
7531
7533 const CCValAssign &VA, const SDLoc &DL) {
7534 EVT LocVT = VA.getLocVT();
7535
7536 switch (VA.getLocInfo()) {
7537 default:
7538 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7539 case CCValAssign::Full:
7540 break;
7541 case CCValAssign::BCvt:
7542 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7543 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7544 else
7545 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7546 break;
7547 }
7548 return Val;
7549}
7550
7551static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7552 CCValAssign::LocInfo LocInfo,
7553 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7554 CCState &State) {
7555 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7556 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7557 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7558 static const MCPhysReg GPRList[] = {
7559 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7560 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7561 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7562 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7563 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7564 return false;
7565 }
7566 }
7567
7568 if (LocVT == MVT::f32) {
7569 // Pass in STG registers: F1, F2, F3, F4
7570 // fs0,fs1,fs2,fs3
7571 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7572 LoongArch::F26, LoongArch::F27};
7573 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7574 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7575 return false;
7576 }
7577 }
7578
7579 if (LocVT == MVT::f64) {
7580 // Pass in STG registers: D1, D2, D3, D4
7581 // fs4,fs5,fs6,fs7
7582 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7583 LoongArch::F30_64, LoongArch::F31_64};
7584 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7585 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7586 return false;
7587 }
7588 }
7589
7590 report_fatal_error("No registers left in GHC calling convention");
7591 return true;
7592}
7593
7594// Transform physical registers into virtual registers.
7596 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7597 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7598 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7599
7601
7602 switch (CallConv) {
7603 default:
7604 llvm_unreachable("Unsupported calling convention");
7605 case CallingConv::C:
7606 case CallingConv::Fast:
7608 break;
7609 case CallingConv::GHC:
7610 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7611 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7613 "GHC calling convention requires the F and D extensions");
7614 }
7615
7616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7617 MVT GRLenVT = Subtarget.getGRLenVT();
7618 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7619 // Used with varargs to acumulate store chains.
7620 std::vector<SDValue> OutChains;
7621
7622 // Assign locations to all of the incoming arguments.
7624 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7625
7626 if (CallConv == CallingConv::GHC)
7628 else
7629 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7630
7631 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7632 CCValAssign &VA = ArgLocs[i];
7633 SDValue ArgValue;
7634 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7635 // case.
7636 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7637 assert(VA.needsCustom());
7638 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7639 } else if (VA.isRegLoc())
7640 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7641 else
7642 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7643 if (VA.getLocInfo() == CCValAssign::Indirect) {
7644 // If the original argument was split and passed by reference, we need to
7645 // load all parts of it here (using the same address).
7646 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7648 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7649 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7650 assert(ArgPartOffset == 0);
7651 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7652 CCValAssign &PartVA = ArgLocs[i + 1];
7653 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7654 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7655 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7656 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7658 ++i;
7659 ++InsIdx;
7660 }
7661 continue;
7662 }
7663 InVals.push_back(ArgValue);
7664 }
7665
7666 if (IsVarArg) {
7668 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7669 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7670 MachineFrameInfo &MFI = MF.getFrameInfo();
7671 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7672 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7673
7674 // Offset of the first variable argument from stack pointer, and size of
7675 // the vararg save area. For now, the varargs save area is either zero or
7676 // large enough to hold a0-a7.
7677 int VaArgOffset, VarArgsSaveSize;
7678
7679 // If all registers are allocated, then all varargs must be passed on the
7680 // stack and we don't need to save any argregs.
7681 if (ArgRegs.size() == Idx) {
7682 VaArgOffset = CCInfo.getStackSize();
7683 VarArgsSaveSize = 0;
7684 } else {
7685 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7686 VaArgOffset = -VarArgsSaveSize;
7687 }
7688
7689 // Record the frame index of the first variable argument
7690 // which is a value necessary to VASTART.
7691 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7692 LoongArchFI->setVarArgsFrameIndex(FI);
7693
7694 // If saving an odd number of registers then create an extra stack slot to
7695 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7696 // offsets to even-numbered registered remain 2*GRLen-aligned.
7697 if (Idx % 2) {
7698 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7699 true);
7700 VarArgsSaveSize += GRLenInBytes;
7701 }
7702
7703 // Copy the integer registers that may have been used for passing varargs
7704 // to the vararg save area.
7705 for (unsigned I = Idx; I < ArgRegs.size();
7706 ++I, VaArgOffset += GRLenInBytes) {
7707 const Register Reg = RegInfo.createVirtualRegister(RC);
7708 RegInfo.addLiveIn(ArgRegs[I], Reg);
7709 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7710 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7711 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7712 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7714 cast<StoreSDNode>(Store.getNode())
7715 ->getMemOperand()
7716 ->setValue((Value *)nullptr);
7717 OutChains.push_back(Store);
7718 }
7719 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7720 }
7721
7722 // All stores are grouped in one node to allow the matching between
7723 // the size of Ins and InVals. This only happens for vararg functions.
7724 if (!OutChains.empty()) {
7725 OutChains.push_back(Chain);
7726 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7727 }
7728
7729 return Chain;
7730}
7731
7733 return CI->isTailCall();
7734}
7735
7736// Check if the return value is used as only a return value, as otherwise
7737// we can't perform a tail-call.
7739 SDValue &Chain) const {
7740 if (N->getNumValues() != 1)
7741 return false;
7742 if (!N->hasNUsesOfValue(1, 0))
7743 return false;
7744
7745 SDNode *Copy = *N->user_begin();
7746 if (Copy->getOpcode() != ISD::CopyToReg)
7747 return false;
7748
7749 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7750 // isn't safe to perform a tail call.
7751 if (Copy->getGluedNode())
7752 return false;
7753
7754 // The copy must be used by a LoongArchISD::RET, and nothing else.
7755 bool HasRet = false;
7756 for (SDNode *Node : Copy->users()) {
7757 if (Node->getOpcode() != LoongArchISD::RET)
7758 return false;
7759 HasRet = true;
7760 }
7761
7762 if (!HasRet)
7763 return false;
7764
7765 Chain = Copy->getOperand(0);
7766 return true;
7767}
7768
7769// Check whether the call is eligible for tail call optimization.
7770bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7771 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7772 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7773
7774 auto CalleeCC = CLI.CallConv;
7775 auto &Outs = CLI.Outs;
7776 auto &Caller = MF.getFunction();
7777 auto CallerCC = Caller.getCallingConv();
7778
7779 // Do not tail call opt if the stack is used to pass parameters.
7780 if (CCInfo.getStackSize() != 0)
7781 return false;
7782
7783 // Do not tail call opt if any parameters need to be passed indirectly.
7784 for (auto &VA : ArgLocs)
7785 if (VA.getLocInfo() == CCValAssign::Indirect)
7786 return false;
7787
7788 // Do not tail call opt if either caller or callee uses struct return
7789 // semantics.
7790 auto IsCallerStructRet = Caller.hasStructRetAttr();
7791 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7792 if (IsCallerStructRet || IsCalleeStructRet)
7793 return false;
7794
7795 // Do not tail call opt if either the callee or caller has a byval argument.
7796 for (auto &Arg : Outs)
7797 if (Arg.Flags.isByVal())
7798 return false;
7799
7800 // The callee has to preserve all registers the caller needs to preserve.
7801 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7802 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7803 if (CalleeCC != CallerCC) {
7804 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7805 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7806 return false;
7807 }
7808 return true;
7809}
7810
7812 return DAG.getDataLayout().getPrefTypeAlign(
7813 VT.getTypeForEVT(*DAG.getContext()));
7814}
7815
7816// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7817// and output parameter nodes.
7818SDValue
7820 SmallVectorImpl<SDValue> &InVals) const {
7821 SelectionDAG &DAG = CLI.DAG;
7822 SDLoc &DL = CLI.DL;
7824 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7826 SDValue Chain = CLI.Chain;
7827 SDValue Callee = CLI.Callee;
7828 CallingConv::ID CallConv = CLI.CallConv;
7829 bool IsVarArg = CLI.IsVarArg;
7830 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7831 MVT GRLenVT = Subtarget.getGRLenVT();
7832 bool &IsTailCall = CLI.IsTailCall;
7833
7835
7836 // Analyze the operands of the call, assigning locations to each operand.
7838 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7839
7840 if (CallConv == CallingConv::GHC)
7841 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7842 else
7843 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7844
7845 // Check if it's really possible to do a tail call.
7846 if (IsTailCall)
7847 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7848
7849 if (IsTailCall)
7850 ++NumTailCalls;
7851 else if (CLI.CB && CLI.CB->isMustTailCall())
7852 report_fatal_error("failed to perform tail call elimination on a call "
7853 "site marked musttail");
7854
7855 // Get a count of how many bytes are to be pushed on the stack.
7856 unsigned NumBytes = ArgCCInfo.getStackSize();
7857
7858 // Create local copies for byval args.
7859 SmallVector<SDValue> ByValArgs;
7860 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7861 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7862 if (!Flags.isByVal())
7863 continue;
7864
7865 SDValue Arg = OutVals[i];
7866 unsigned Size = Flags.getByValSize();
7867 Align Alignment = Flags.getNonZeroByValAlign();
7868
7869 int FI =
7870 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7871 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7872 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7873
7874 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7875 /*IsVolatile=*/false,
7876 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7878 ByValArgs.push_back(FIPtr);
7879 }
7880
7881 if (!IsTailCall)
7882 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7883
7884 // Copy argument values to their designated locations.
7886 SmallVector<SDValue> MemOpChains;
7887 SDValue StackPtr;
7888 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7889 ++i, ++OutIdx) {
7890 CCValAssign &VA = ArgLocs[i];
7891 SDValue ArgValue = OutVals[OutIdx];
7892 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7893
7894 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7895 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7896 assert(VA.isRegLoc() && "Expected register VA assignment");
7897 assert(VA.needsCustom());
7898 SDValue SplitF64 =
7900 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7901 SDValue Lo = SplitF64.getValue(0);
7902 SDValue Hi = SplitF64.getValue(1);
7903
7904 Register RegLo = VA.getLocReg();
7905 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7906
7907 // Get the CCValAssign for the Hi part.
7908 CCValAssign &HiVA = ArgLocs[++i];
7909
7910 if (HiVA.isMemLoc()) {
7911 // Second half of f64 is passed on the stack.
7912 if (!StackPtr.getNode())
7913 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7915 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7916 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7917 // Emit the store.
7918 MemOpChains.push_back(DAG.getStore(
7919 Chain, DL, Hi, Address,
7921 } else {
7922 // Second half of f64 is passed in another GPR.
7923 Register RegHigh = HiVA.getLocReg();
7924 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7925 }
7926 continue;
7927 }
7928
7929 // Promote the value if needed.
7930 // For now, only handle fully promoted and indirect arguments.
7931 if (VA.getLocInfo() == CCValAssign::Indirect) {
7932 // Store the argument in a stack slot and pass its address.
7933 Align StackAlign =
7934 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7935 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7936 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7937 // If the original argument was split and passed by reference, we need to
7938 // store the required parts of it here (and pass just one address).
7939 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7940 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7941 assert(ArgPartOffset == 0);
7942 // Calculate the total size to store. We don't have access to what we're
7943 // actually storing other than performing the loop and collecting the
7944 // info.
7946 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7947 SDValue PartValue = OutVals[OutIdx + 1];
7948 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7949 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7950 EVT PartVT = PartValue.getValueType();
7951
7952 StoredSize += PartVT.getStoreSize();
7953 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7954 Parts.push_back(std::make_pair(PartValue, Offset));
7955 ++i;
7956 ++OutIdx;
7957 }
7958 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7959 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7960 MemOpChains.push_back(
7961 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7963 for (const auto &Part : Parts) {
7964 SDValue PartValue = Part.first;
7965 SDValue PartOffset = Part.second;
7967 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7968 MemOpChains.push_back(
7969 DAG.getStore(Chain, DL, PartValue, Address,
7971 }
7972 ArgValue = SpillSlot;
7973 } else {
7974 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7975 }
7976
7977 // Use local copy if it is a byval arg.
7978 if (Flags.isByVal())
7979 ArgValue = ByValArgs[j++];
7980
7981 if (VA.isRegLoc()) {
7982 // Queue up the argument copies and emit them at the end.
7983 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7984 } else {
7985 assert(VA.isMemLoc() && "Argument not register or memory");
7986 assert(!IsTailCall && "Tail call not allowed if stack is used "
7987 "for passing parameters");
7988
7989 // Work out the address of the stack slot.
7990 if (!StackPtr.getNode())
7991 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7993 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7995
7996 // Emit the store.
7997 MemOpChains.push_back(
7998 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7999 }
8000 }
8001
8002 // Join the stores, which are independent of one another.
8003 if (!MemOpChains.empty())
8004 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8005
8006 SDValue Glue;
8007
8008 // Build a sequence of copy-to-reg nodes, chained and glued together.
8009 for (auto &Reg : RegsToPass) {
8010 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8011 Glue = Chain.getValue(1);
8012 }
8013
8014 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8015 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8016 // split it and then direct call can be matched by PseudoCALL.
8018 const GlobalValue *GV = S->getGlobal();
8019 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8022 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8023 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8024 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8027 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8028 }
8029
8030 // The first call operand is the chain and the second is the target address.
8032 Ops.push_back(Chain);
8033 Ops.push_back(Callee);
8034
8035 // Add argument registers to the end of the list so that they are
8036 // known live into the call.
8037 for (auto &Reg : RegsToPass)
8038 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8039
8040 if (!IsTailCall) {
8041 // Add a register mask operand representing the call-preserved registers.
8042 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8043 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8044 assert(Mask && "Missing call preserved mask for calling convention");
8045 Ops.push_back(DAG.getRegisterMask(Mask));
8046 }
8047
8048 // Glue the call to the argument copies, if any.
8049 if (Glue.getNode())
8050 Ops.push_back(Glue);
8051
8052 // Emit the call.
8053 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8054 unsigned Op;
8055 switch (DAG.getTarget().getCodeModel()) {
8056 default:
8057 report_fatal_error("Unsupported code model");
8058 case CodeModel::Small:
8059 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8060 break;
8061 case CodeModel::Medium:
8062 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8064 break;
8065 case CodeModel::Large:
8066 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8068 break;
8069 }
8070
8071 if (IsTailCall) {
8073 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8074 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8075 return Ret;
8076 }
8077
8078 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8079 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8080 Glue = Chain.getValue(1);
8081
8082 // Mark the end of the call, which is glued to the call itself.
8083 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8084 Glue = Chain.getValue(1);
8085
8086 // Assign locations to each value returned by this call.
8088 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8089 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8090
8091 // Copy all of the result registers out of their specified physreg.
8092 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8093 auto &VA = RVLocs[i];
8094 // Copy the value out.
8095 SDValue RetValue =
8096 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8097 // Glue the RetValue to the end of the call sequence.
8098 Chain = RetValue.getValue(1);
8099 Glue = RetValue.getValue(2);
8100
8101 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8102 assert(VA.needsCustom());
8103 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8104 MVT::i32, Glue);
8105 Chain = RetValue2.getValue(1);
8106 Glue = RetValue2.getValue(2);
8107 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8108 RetValue, RetValue2);
8109 } else
8110 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8111
8112 InVals.push_back(RetValue);
8113 }
8114
8115 return Chain;
8116}
8117
8119 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8120 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8121 const Type *RetTy) const {
8123 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8124
8125 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8126 LoongArchABI::ABI ABI =
8127 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8128 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8129 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8130 return false;
8131 }
8132 return true;
8133}
8134
8136 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8138 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8139 SelectionDAG &DAG) const {
8140 // Stores the assignment of the return value to a location.
8142
8143 // Info about the registers and stack slot.
8144 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8145 *DAG.getContext());
8146
8147 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8148 nullptr, CC_LoongArch);
8149 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8150 report_fatal_error("GHC functions return void only");
8151 SDValue Glue;
8152 SmallVector<SDValue, 4> RetOps(1, Chain);
8153
8154 // Copy the result values into the output registers.
8155 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8156 SDValue Val = OutVals[OutIdx];
8157 CCValAssign &VA = RVLocs[i];
8158 assert(VA.isRegLoc() && "Can only return in registers!");
8159
8160 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8161 // Handle returning f64 on LA32D with a soft float ABI.
8162 assert(VA.isRegLoc() && "Expected return via registers");
8163 assert(VA.needsCustom());
8165 DAG.getVTList(MVT::i32, MVT::i32), Val);
8166 SDValue Lo = SplitF64.getValue(0);
8167 SDValue Hi = SplitF64.getValue(1);
8168 Register RegLo = VA.getLocReg();
8169 Register RegHi = RVLocs[++i].getLocReg();
8170
8171 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8172 Glue = Chain.getValue(1);
8173 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8174 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8175 Glue = Chain.getValue(1);
8176 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8177 } else {
8178 // Handle a 'normal' return.
8179 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8180 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8181
8182 // Guarantee that all emitted copies are stuck together.
8183 Glue = Chain.getValue(1);
8184 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8185 }
8186 }
8187
8188 RetOps[0] = Chain; // Update chain.
8189
8190 // Add the glue node if we have it.
8191 if (Glue.getNode())
8192 RetOps.push_back(Glue);
8193
8194 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8195}
8196
8198 EVT VT) const {
8199 if (!Subtarget.hasExtLSX())
8200 return false;
8201
8202 if (VT == MVT::f32) {
8203 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8204 return (masked == 0x3e000000 || masked == 0x40000000);
8205 }
8206
8207 if (VT == MVT::f64) {
8208 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8209 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8210 }
8211
8212 return false;
8213}
8214
8215bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8216 bool ForCodeSize) const {
8217 // TODO: Maybe need more checks here after vector extension is supported.
8218 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8219 return false;
8220 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8221 return false;
8222 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8223}
8224
8226 return true;
8227}
8228
8230 return true;
8231}
8232
8233bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8234 const Instruction *I) const {
8235 if (!Subtarget.is64Bit())
8236 return isa<LoadInst>(I) || isa<StoreInst>(I);
8237
8238 if (isa<LoadInst>(I))
8239 return true;
8240
8241 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8242 // require fences beacuse we can use amswap_db.[w/d].
8243 Type *Ty = I->getOperand(0)->getType();
8244 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8245 unsigned Size = Ty->getIntegerBitWidth();
8246 return (Size == 8 || Size == 16);
8247 }
8248
8249 return false;
8250}
8251
8253 LLVMContext &Context,
8254 EVT VT) const {
8255 if (!VT.isVector())
8256 return getPointerTy(DL);
8258}
8259
8261 // TODO: Support vectors.
8262 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8263}
8264
8266 const CallInst &I,
8267 MachineFunction &MF,
8268 unsigned Intrinsic) const {
8269 switch (Intrinsic) {
8270 default:
8271 return false;
8272 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8273 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8274 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8275 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8276 Info.opc = ISD::INTRINSIC_W_CHAIN;
8277 Info.memVT = MVT::i32;
8278 Info.ptrVal = I.getArgOperand(0);
8279 Info.offset = 0;
8280 Info.align = Align(4);
8283 return true;
8284 // TODO: Add more Intrinsics later.
8285 }
8286}
8287
8288// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8289// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8290// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8291// regression, we need to implement it manually.
8294
8296 Op == AtomicRMWInst::And) &&
8297 "Unable to expand");
8298 unsigned MinWordSize = 4;
8299
8300 IRBuilder<> Builder(AI);
8301 LLVMContext &Ctx = Builder.getContext();
8302 const DataLayout &DL = AI->getDataLayout();
8303 Type *ValueType = AI->getType();
8304 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8305
8306 Value *Addr = AI->getPointerOperand();
8307 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8308 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8309
8310 Value *AlignedAddr = Builder.CreateIntrinsic(
8311 Intrinsic::ptrmask, {PtrTy, IntTy},
8312 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8313 "AlignedAddr");
8314
8315 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8316 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8317 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8318 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8319 Value *Mask = Builder.CreateShl(
8320 ConstantInt::get(WordType,
8321 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8322 ShiftAmt, "Mask");
8323 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8324 Value *ValOperand_Shifted =
8325 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8326 ShiftAmt, "ValOperand_Shifted");
8327 Value *NewOperand;
8328 if (Op == AtomicRMWInst::And)
8329 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8330 else
8331 NewOperand = ValOperand_Shifted;
8332
8333 AtomicRMWInst *NewAI =
8334 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8335 AI->getOrdering(), AI->getSyncScopeID());
8336
8337 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8338 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8339 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8340 AI->replaceAllUsesWith(FinalOldResult);
8341 AI->eraseFromParent();
8342}
8343
8346 // TODO: Add more AtomicRMWInst that needs to be extended.
8347
8348 // Since floating-point operation requires a non-trivial set of data
8349 // operations, use CmpXChg to expand.
8350 if (AI->isFloatingPointOperation() ||
8356
8357 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8360 AI->getOperation() == AtomicRMWInst::Sub)) {
8362 }
8363
8364 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8365 if (Subtarget.hasLAMCAS()) {
8366 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8370 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8372 }
8373
8374 if (Size == 8 || Size == 16)
8377}
8378
8379static Intrinsic::ID
8381 AtomicRMWInst::BinOp BinOp) {
8382 if (GRLen == 64) {
8383 switch (BinOp) {
8384 default:
8385 llvm_unreachable("Unexpected AtomicRMW BinOp");
8387 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8388 case AtomicRMWInst::Add:
8389 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8390 case AtomicRMWInst::Sub:
8391 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8393 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8395 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8397 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8398 case AtomicRMWInst::Max:
8399 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8400 case AtomicRMWInst::Min:
8401 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8402 // TODO: support other AtomicRMWInst.
8403 }
8404 }
8405
8406 if (GRLen == 32) {
8407 switch (BinOp) {
8408 default:
8409 llvm_unreachable("Unexpected AtomicRMW BinOp");
8411 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8412 case AtomicRMWInst::Add:
8413 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8414 case AtomicRMWInst::Sub:
8415 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8417 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8419 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8421 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8422 case AtomicRMWInst::Max:
8423 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8424 case AtomicRMWInst::Min:
8425 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8426 // TODO: support other AtomicRMWInst.
8427 }
8428 }
8429
8430 llvm_unreachable("Unexpected GRLen\n");
8431}
8432
8435 AtomicCmpXchgInst *CI) const {
8436
8437 if (Subtarget.hasLAMCAS())
8439
8441 if (Size == 8 || Size == 16)
8444}
8445
8447 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8448 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8449 unsigned GRLen = Subtarget.getGRLen();
8450 AtomicOrdering FailOrd = CI->getFailureOrdering();
8451 Value *FailureOrdering =
8452 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8453 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8454 if (GRLen == 64) {
8455 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8456 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8457 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8458 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8459 }
8460 Type *Tys[] = {AlignedAddr->getType()};
8461 Value *Result = Builder.CreateIntrinsic(
8462 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8463 if (GRLen == 64)
8464 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8465 return Result;
8466}
8467
8469 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8470 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8471 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8472 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8473 // mask, as this produces better code than the LL/SC loop emitted by
8474 // int_loongarch_masked_atomicrmw_xchg.
8475 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8478 if (CVal->isZero())
8479 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8480 Builder.CreateNot(Mask, "Inv_Mask"),
8481 AI->getAlign(), Ord);
8482 if (CVal->isMinusOne())
8483 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8484 AI->getAlign(), Ord);
8485 }
8486
8487 unsigned GRLen = Subtarget.getGRLen();
8488 Value *Ordering =
8489 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8490 Type *Tys[] = {AlignedAddr->getType()};
8492 AI->getModule(),
8494
8495 if (GRLen == 64) {
8496 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8497 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8498 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8499 }
8500
8501 Value *Result;
8502
8503 // Must pass the shift amount needed to sign extend the loaded value prior
8504 // to performing a signed comparison for min/max. ShiftAmt is the number of
8505 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8506 // is the number of bits to left+right shift the value in order to
8507 // sign-extend.
8508 if (AI->getOperation() == AtomicRMWInst::Min ||
8510 const DataLayout &DL = AI->getDataLayout();
8511 unsigned ValWidth =
8512 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8513 Value *SextShamt =
8514 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8515 Result = Builder.CreateCall(LlwOpScwLoop,
8516 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8517 } else {
8518 Result =
8519 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8520 }
8521
8522 if (GRLen == 64)
8523 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8524 return Result;
8525}
8526
8528 const MachineFunction &MF, EVT VT) const {
8529 VT = VT.getScalarType();
8530
8531 if (!VT.isSimple())
8532 return false;
8533
8534 switch (VT.getSimpleVT().SimpleTy) {
8535 case MVT::f32:
8536 case MVT::f64:
8537 return true;
8538 default:
8539 break;
8540 }
8541
8542 return false;
8543}
8544
8546 const Constant *PersonalityFn) const {
8547 return LoongArch::R4;
8548}
8549
8551 const Constant *PersonalityFn) const {
8552 return LoongArch::R5;
8553}
8554
8555//===----------------------------------------------------------------------===//
8556// Target Optimization Hooks
8557//===----------------------------------------------------------------------===//
8558
8560 const LoongArchSubtarget &Subtarget) {
8561 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8562 // IEEE float has 23 digits and double has 52 digits.
8563 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8564 return RefinementSteps;
8565}
8566
8568 SelectionDAG &DAG, int Enabled,
8569 int &RefinementSteps,
8570 bool &UseOneConstNR,
8571 bool Reciprocal) const {
8572 if (Subtarget.hasFrecipe()) {
8573 SDLoc DL(Operand);
8574 EVT VT = Operand.getValueType();
8575
8576 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8577 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8578 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8579 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8580 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8581
8582 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8583 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8584
8585 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8586 if (Reciprocal)
8587 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8588
8589 return Estimate;
8590 }
8591 }
8592
8593 return SDValue();
8594}
8595
8597 SelectionDAG &DAG,
8598 int Enabled,
8599 int &RefinementSteps) const {
8600 if (Subtarget.hasFrecipe()) {
8601 SDLoc DL(Operand);
8602 EVT VT = Operand.getValueType();
8603
8604 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8605 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8606 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8607 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8608 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8609
8610 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8611 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8612
8613 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8614 }
8615 }
8616
8617 return SDValue();
8618}
8619
8620//===----------------------------------------------------------------------===//
8621// LoongArch Inline Assembly Support
8622//===----------------------------------------------------------------------===//
8623
8625LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8626 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8627 //
8628 // 'f': A floating-point register (if available).
8629 // 'k': A memory operand whose address is formed by a base register and
8630 // (optionally scaled) index register.
8631 // 'l': A signed 16-bit constant.
8632 // 'm': A memory operand whose address is formed by a base register and
8633 // offset that is suitable for use in instructions with the same
8634 // addressing mode as st.w and ld.w.
8635 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8636 // instruction)
8637 // 'I': A signed 12-bit constant (for arithmetic instructions).
8638 // 'J': Integer zero.
8639 // 'K': An unsigned 12-bit constant (for logic instructions).
8640 // "ZB": An address that is held in a general-purpose register. The offset is
8641 // zero.
8642 // "ZC": A memory operand whose address is formed by a base register and
8643 // offset that is suitable for use in instructions with the same
8644 // addressing mode as ll.w and sc.w.
8645 if (Constraint.size() == 1) {
8646 switch (Constraint[0]) {
8647 default:
8648 break;
8649 case 'f':
8650 case 'q':
8651 return C_RegisterClass;
8652 case 'l':
8653 case 'I':
8654 case 'J':
8655 case 'K':
8656 return C_Immediate;
8657 case 'k':
8658 return C_Memory;
8659 }
8660 }
8661
8662 if (Constraint == "ZC" || Constraint == "ZB")
8663 return C_Memory;
8664
8665 // 'm' is handled here.
8666 return TargetLowering::getConstraintType(Constraint);
8667}
8668
8669InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8670 StringRef ConstraintCode) const {
8671 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8675 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8676}
8677
8678std::pair<unsigned, const TargetRegisterClass *>
8679LoongArchTargetLowering::getRegForInlineAsmConstraint(
8680 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8681 // First, see if this is a constraint that directly corresponds to a LoongArch
8682 // register class.
8683 if (Constraint.size() == 1) {
8684 switch (Constraint[0]) {
8685 case 'r':
8686 // TODO: Support fixed vectors up to GRLen?
8687 if (VT.isVector())
8688 break;
8689 return std::make_pair(0U, &LoongArch::GPRRegClass);
8690 case 'q':
8691 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8692 case 'f':
8693 if (Subtarget.hasBasicF() && VT == MVT::f32)
8694 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8695 if (Subtarget.hasBasicD() && VT == MVT::f64)
8696 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8697 if (Subtarget.hasExtLSX() &&
8698 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8699 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8700 if (Subtarget.hasExtLASX() &&
8701 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8702 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8703 break;
8704 default:
8705 break;
8706 }
8707 }
8708
8709 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8710 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8711 // constraints while the official register name is prefixed with a '$'. So we
8712 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8713 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8714 // case insensitive, so no need to convert the constraint to upper case here.
8715 //
8716 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8717 // decode the usage of register name aliases into their official names. And
8718 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8719 // official register names.
8720 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8721 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8722 bool IsFP = Constraint[2] == 'f';
8723 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8724 std::pair<unsigned, const TargetRegisterClass *> R;
8726 TRI, join_items("", Temp.first, Temp.second), VT);
8727 // Match those names to the widest floating point register type available.
8728 if (IsFP) {
8729 unsigned RegNo = R.first;
8730 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8731 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8732 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8733 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8734 }
8735 }
8736 }
8737 return R;
8738 }
8739
8740 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8741}
8742
8743void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8744 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8745 SelectionDAG &DAG) const {
8746 // Currently only support length 1 constraints.
8747 if (Constraint.size() == 1) {
8748 switch (Constraint[0]) {
8749 case 'l':
8750 // Validate & create a 16-bit signed immediate operand.
8751 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8752 uint64_t CVal = C->getSExtValue();
8753 if (isInt<16>(CVal))
8754 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8755 Subtarget.getGRLenVT()));
8756 }
8757 return;
8758 case 'I':
8759 // Validate & create a 12-bit signed immediate operand.
8760 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8761 uint64_t CVal = C->getSExtValue();
8762 if (isInt<12>(CVal))
8763 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8764 Subtarget.getGRLenVT()));
8765 }
8766 return;
8767 case 'J':
8768 // Validate & create an integer zero operand.
8769 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8770 if (C->getZExtValue() == 0)
8771 Ops.push_back(
8772 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8773 return;
8774 case 'K':
8775 // Validate & create a 12-bit unsigned immediate operand.
8776 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8777 uint64_t CVal = C->getZExtValue();
8778 if (isUInt<12>(CVal))
8779 Ops.push_back(
8780 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8781 }
8782 return;
8783 default:
8784 break;
8785 }
8786 }
8788}
8789
8790#define GET_REGISTER_MATCHER
8791#include "LoongArchGenAsmMatcher.inc"
8792
8795 const MachineFunction &MF) const {
8796 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8797 std::string NewRegName = Name.second.str();
8798 Register Reg = MatchRegisterAltName(NewRegName);
8799 if (!Reg)
8800 Reg = MatchRegisterName(NewRegName);
8801 if (!Reg)
8802 return Reg;
8803 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8804 if (!ReservedRegs.test(Reg))
8805 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8806 StringRef(RegName) + "\"."));
8807 return Reg;
8808}
8809
8811 EVT VT, SDValue C) const {
8812 // TODO: Support vectors.
8813 if (!VT.isScalarInteger())
8814 return false;
8815
8816 // Omit the optimization if the data size exceeds GRLen.
8817 if (VT.getSizeInBits() > Subtarget.getGRLen())
8818 return false;
8819
8820 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8821 const APInt &Imm = ConstNode->getAPIntValue();
8822 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8823 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8824 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8825 return true;
8826 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8827 if (ConstNode->hasOneUse() &&
8828 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8829 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8830 return true;
8831 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8832 // in which the immediate has two set bits. Or Break (MUL x, imm)
8833 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8834 // equals to (1 << s0) - (1 << s1).
8835 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8836 unsigned Shifts = Imm.countr_zero();
8837 // Reject immediates which can be composed via a single LUI.
8838 if (Shifts >= 12)
8839 return false;
8840 // Reject multiplications can be optimized to
8841 // (SLLI (ALSL x, x, 1/2/3/4), s).
8842 APInt ImmPop = Imm.ashr(Shifts);
8843 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8844 return false;
8845 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8846 // since it needs one more instruction than other 3 cases.
8847 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8848 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8849 (ImmSmall - Imm).isPowerOf2())
8850 return true;
8851 }
8852 }
8853
8854 return false;
8855}
8856
8858 const AddrMode &AM,
8859 Type *Ty, unsigned AS,
8860 Instruction *I) const {
8861 // LoongArch has four basic addressing modes:
8862 // 1. reg
8863 // 2. reg + 12-bit signed offset
8864 // 3. reg + 14-bit signed offset left-shifted by 2
8865 // 4. reg1 + reg2
8866 // TODO: Add more checks after support vector extension.
8867
8868 // No global is ever allowed as a base.
8869 if (AM.BaseGV)
8870 return false;
8871
8872 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8873 // with `UAL` feature.
8874 if (!isInt<12>(AM.BaseOffs) &&
8875 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8876 return false;
8877
8878 switch (AM.Scale) {
8879 case 0:
8880 // "r+i" or just "i", depending on HasBaseReg.
8881 break;
8882 case 1:
8883 // "r+r+i" is not allowed.
8884 if (AM.HasBaseReg && AM.BaseOffs)
8885 return false;
8886 // Otherwise we have "r+r" or "r+i".
8887 break;
8888 case 2:
8889 // "2*r+r" or "2*r+i" is not allowed.
8890 if (AM.HasBaseReg || AM.BaseOffs)
8891 return false;
8892 // Allow "2*r" as "r+r".
8893 break;
8894 default:
8895 return false;
8896 }
8897
8898 return true;
8899}
8900
8902 return isInt<12>(Imm);
8903}
8904
8906 return isInt<12>(Imm);
8907}
8908
8910 // Zexts are free if they can be combined with a load.
8911 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8912 // poorly with type legalization of compares preferring sext.
8913 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8914 EVT MemVT = LD->getMemoryVT();
8915 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8916 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8917 LD->getExtensionType() == ISD::ZEXTLOAD))
8918 return true;
8919 }
8920
8921 return TargetLowering::isZExtFree(Val, VT2);
8922}
8923
8925 EVT DstVT) const {
8926 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8927}
8928
8930 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8931}
8932
8934 // TODO: Support vectors.
8935 if (Y.getValueType().isVector())
8936 return false;
8937
8938 return !isa<ConstantSDNode>(Y);
8939}
8940
8942 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8943 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8944}
8945
8947 Type *Ty, bool IsSigned) const {
8948 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8949 return true;
8950
8951 return IsSigned;
8952}
8953
8955 // Return false to suppress the unnecessary extensions if the LibCall
8956 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8957 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8958 Type.getSizeInBits() < Subtarget.getGRLen()))
8959 return false;
8960 return true;
8961}
8962
8963// memcpy, and other memory intrinsics, typically tries to use wider load/store
8964// if the source/dest is aligned and the copy size is large enough. We therefore
8965// want to align such objects passed to memory intrinsics.
8967 unsigned &MinSize,
8968 Align &PrefAlign) const {
8969 if (!isa<MemIntrinsic>(CI))
8970 return false;
8971
8972 if (Subtarget.is64Bit()) {
8973 MinSize = 8;
8974 PrefAlign = Align(8);
8975 } else {
8976 MinSize = 4;
8977 PrefAlign = Align(4);
8978 }
8979
8980 return true;
8981}
8982
8991
8992bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8993 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8994 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8995 bool IsABIRegCopy = CC.has_value();
8996 EVT ValueVT = Val.getValueType();
8997
8998 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8999 PartVT == MVT::f32) {
9000 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9001 // nan, and cast to f32.
9002 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9003 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9004 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9005 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9006 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9007 Parts[0] = Val;
9008 return true;
9009 }
9010
9011 return false;
9012}
9013
9014SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9015 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9016 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9017 bool IsABIRegCopy = CC.has_value();
9018
9019 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9020 PartVT == MVT::f32) {
9021 SDValue Val = Parts[0];
9022
9023 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9024 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9025 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9026 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9027 return Val;
9028 }
9029
9030 return SDValue();
9031}
9032
9033MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9034 CallingConv::ID CC,
9035 EVT VT) const {
9036 // Use f32 to pass f16.
9037 if (VT == MVT::f16 && Subtarget.hasBasicF())
9038 return MVT::f32;
9039
9041}
9042
9043unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9044 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9045 // Use f32 to pass f16.
9046 if (VT == MVT::f16 && Subtarget.hasBasicF())
9047 return 1;
9048
9050}
9051
9053 SDValue Op, const APInt &OriginalDemandedBits,
9054 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9055 unsigned Depth) const {
9056 EVT VT = Op.getValueType();
9057 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9058 unsigned Opc = Op.getOpcode();
9059 switch (Opc) {
9060 default:
9061 break;
9064 SDValue Src = Op.getOperand(0);
9065 MVT SrcVT = Src.getSimpleValueType();
9066 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9067 unsigned NumElts = SrcVT.getVectorNumElements();
9068
9069 // If we don't need the sign bits at all just return zero.
9070 if (OriginalDemandedBits.countr_zero() >= NumElts)
9071 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9072
9073 // Only demand the vector elements of the sign bits we need.
9074 APInt KnownUndef, KnownZero;
9075 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9076 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9077 TLO, Depth + 1))
9078 return true;
9079
9080 Known.Zero = KnownZero.zext(BitWidth);
9081 Known.Zero.setHighBits(BitWidth - NumElts);
9082
9083 // [X]VMSKLTZ only uses the MSB from each vector element.
9084 KnownBits KnownSrc;
9085 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9086 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9087 Depth + 1))
9088 return true;
9089
9090 if (KnownSrc.One[SrcBits - 1])
9091 Known.One.setLowBits(NumElts);
9092 else if (KnownSrc.Zero[SrcBits - 1])
9093 Known.Zero.setLowBits(NumElts);
9094
9095 // Attempt to avoid multi-use ops if we don't need anything from it.
9097 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9098 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9099 return false;
9100 }
9101 }
9102
9104 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9105}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:130
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1721
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...