LLVM 22.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37// Temporarily let this be disabled by default until all known problems
38// related to argument extensions are fixed.
40 "argext-abi-check", cl::init(false),
41 cl::desc("Verify that narrow int args are properly extended per the "
42 "SystemZ ABI."));
43
44namespace {
45// Represents information about a comparison.
46struct Comparison {
47 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
48 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
49 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
50
51 // The operands to the comparison.
52 SDValue Op0, Op1;
53
54 // Chain if this is a strict floating-point comparison.
55 SDValue Chain;
56
57 // The opcode that should be used to compare Op0 and Op1.
58 unsigned Opcode;
59
60 // A SystemZICMP value. Only used for integer comparisons.
61 unsigned ICmpType;
62
63 // The mask of CC values that Opcode can produce.
64 unsigned CCValid;
65
66 // The mask of CC values for which the original condition is true.
67 unsigned CCMask;
68};
69} // end anonymous namespace
70
71// Classify VT as either 32 or 64 bit.
72static bool is32Bit(EVT VT) {
73 switch (VT.getSimpleVT().SimpleTy) {
74 case MVT::i32:
75 return true;
76 case MVT::i64:
77 return false;
78 default:
79 llvm_unreachable("Unsupported type");
80 }
81}
82
83// Return a version of MachineOperand that can be safely used before the
84// final use.
86 if (Op.isReg())
87 Op.setIsKill(false);
88 return Op;
89}
90
92 const SystemZSubtarget &STI)
93 : TargetLowering(TM), Subtarget(STI) {
94 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
95
96 auto *Regs = STI.getSpecialRegisters();
97
98 // Set up the register classes.
99 if (Subtarget.hasHighWord())
100 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
101 else
102 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
103 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
104 if (!useSoftFloat()) {
105 if (Subtarget.hasVector()) {
106 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
107 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
108 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
109 } else {
110 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
111 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
112 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
113 }
114 if (Subtarget.hasVectorEnhancements1())
115 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
116 else
117 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
118
119 if (Subtarget.hasVector()) {
120 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
121 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
122 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
126 }
127
128 if (Subtarget.hasVector())
129 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
130 }
131
132 // Compute derived properties from the register classes
134
135 // Set up special registers.
136 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
137
138 // TODO: It may be better to default to latency-oriented scheduling, however
139 // LLVM's current latency-oriented scheduler can't handle physreg definitions
140 // such as SystemZ has with CC, so set this to the register-pressure
141 // scheduler, because it can.
143
146
148
149 // Instructions are strings of 2-byte aligned 2-byte values.
151 // For performance reasons we prefer 16-byte alignment.
153
154 // Handle operations that are handled in a similar way for all types.
155 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
156 I <= MVT::LAST_FP_VALUETYPE;
157 ++I) {
159 if (isTypeLegal(VT)) {
160 // Lower SET_CC into an IPM-based sequence.
164
165 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
167
168 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
171 }
172 }
173
174 // Expand jump table branches as address arithmetic followed by an
175 // indirect jump.
177
178 // Expand BRCOND into a BR_CC (see above).
180
181 // Handle integer types except i128.
182 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
183 I <= MVT::LAST_INTEGER_VALUETYPE;
184 ++I) {
186 if (isTypeLegal(VT) && VT != MVT::i128) {
188
189 // Expand individual DIV and REMs into DIVREMs.
196
197 // Support addition/subtraction with overflow.
200
201 // Support addition/subtraction with carry.
204
205 // Support carry in as value rather than glue.
208
209 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
210 // available, or if the operand is constant.
212
213 // Use POPCNT on z196 and above.
214 if (Subtarget.hasPopulationCount())
216 else
218
219 // No special instructions for these.
222
223 // Use *MUL_LOHI where possible instead of MULH*.
228
229 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
230 // unsigned on z10 (only z196 and above have native support for
231 // unsigned conversions).
238 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
239 auto OpAction =
240 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
241 setOperationAction(Op, VT, OpAction);
242 }
243 }
244 }
245
246 // Handle i128 if legal.
247 if (isTypeLegal(MVT::i128)) {
248 // No special instructions for these.
255
256 // We may be able to use VSLDB/VSLD/VSRD for these.
259
260 // No special instructions for these before z17.
261 if (!Subtarget.hasVectorEnhancements3()) {
271 } else {
272 // Even if we do have a legal 128-bit multiply, we do not
273 // want 64-bit multiply-high operations to use it.
276 }
277
278 // Support addition/subtraction with carry.
283
284 // Use VPOPCT and add up partial results.
286
287 // Additional instructions available with z17.
288 if (Subtarget.hasVectorEnhancements3()) {
289 setOperationAction(ISD::ABS, MVT::i128, Legal);
290
292 MVT::i128, Legal);
293 }
294 }
295
296 // These need custom handling in order to handle the f16 conversions.
305
306 // Type legalization will convert 8- and 16-bit atomic operations into
307 // forms that operate on i32s (but still keeping the original memory VT).
308 // Lower them into full i32 operations.
320
321 // Whether or not i128 is not a legal type, we need to custom lower
322 // the atomic operations in order to exploit SystemZ instructions.
327
328 // Mark sign/zero extending atomic loads as legal, which will make
329 // DAGCombiner fold extensions into atomic loads if possible.
331 {MVT::i8, MVT::i16, MVT::i32}, Legal);
333 {MVT::i8, MVT::i16}, Legal);
335 MVT::i8, Legal);
336
337 // We can use the CC result of compare-and-swap to implement
338 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
342
344
345 // Traps are legal, as we will convert them to "j .+2".
346 setOperationAction(ISD::TRAP, MVT::Other, Legal);
347
348 // We have native support for a 64-bit CTLZ, via FLOGR.
352
353 // On z17 we have native support for a 64-bit CTTZ.
354 if (Subtarget.hasMiscellaneousExtensions4()) {
358 }
359
360 // On z15 we have native support for a 64-bit CTPOP.
361 if (Subtarget.hasMiscellaneousExtensions3()) {
364 }
365
366 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
368
369 // Expand 128 bit shifts without using a libcall.
373
374 // Also expand 256 bit shifts if i128 is a legal type.
375 if (isTypeLegal(MVT::i128)) {
379 }
380
381 // Handle bitcast from fp128 to i128.
382 if (!isTypeLegal(MVT::i128))
384
385 // We have native instructions for i8, i16 and i32 extensions, but not i1.
387 for (MVT VT : MVT::integer_valuetypes()) {
391 }
392
393 // Handle the various types of symbolic address.
399
400 // We need to handle dynamic allocations specially because of the
401 // 160-byte area at the bottom of the stack.
404
407
408 // Handle prefetches with PFD or PFDRL.
410
411 // Handle readcyclecounter with STCKF.
413
415 // Assume by default that all vector operations need to be expanded.
416 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
417 if (getOperationAction(Opcode, VT) == Legal)
418 setOperationAction(Opcode, VT, Expand);
419
420 // Likewise all truncating stores and extending loads.
421 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
422 setTruncStoreAction(VT, InnerVT, Expand);
425 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
426 }
427
428 if (isTypeLegal(VT)) {
429 // These operations are legal for anything that can be stored in a
430 // vector register, even if there is no native support for the format
431 // as such. In particular, we can do these for v4f32 even though there
432 // are no specific instructions for that format.
438
439 // Likewise, except that we need to replace the nodes with something
440 // more specific.
443 }
444 }
445
446 // Handle integer vector types.
448 if (isTypeLegal(VT)) {
449 // These operations have direct equivalents.
454 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
458 }
459 if (Subtarget.hasVectorEnhancements3() &&
460 VT != MVT::v16i8 && VT != MVT::v8i16) {
465 }
470 if (Subtarget.hasVectorEnhancements1())
472 else
476
477 // Convert a GPR scalar to a vector by inserting it into element 0.
479
480 // Use a series of unpacks for extensions.
483
484 // Detect shifts/rotates by a scalar amount and convert them into
485 // V*_BY_SCALAR.
490
491 // Add ISD::VECREDUCE_ADD as custom in order to implement
492 // it with VZERO+VSUM
494
495 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
496 // and inverting the result as necessary.
498
500 Legal);
501 }
502 }
503
504 if (Subtarget.hasVector()) {
505 // There should be no need to check for float types other than v2f64
506 // since <2 x f32> isn't a legal type.
515
524 }
525
526 if (Subtarget.hasVectorEnhancements2()) {
535
544 }
545
546 // Handle floating-point types.
547 if (!useSoftFloat()) {
548 // Promote all f16 operations to float, with some exceptions below.
549 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
550 setOperationAction(Opc, MVT::f16, Promote);
552 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
553 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
554 setTruncStoreAction(VT, MVT::f16, Expand);
555 }
557 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
562 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
563 setOperationAction(Op, MVT::f16, Legal);
564 }
565
566 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
567 I <= MVT::LAST_FP_VALUETYPE;
568 ++I) {
570 if (isTypeLegal(VT) && VT != MVT::f16) {
571 // We can use FI for FRINT.
573
574 // We can use the extended form of FI for other rounding operations.
575 if (Subtarget.hasFPExtension()) {
582 }
583
584 // No special instructions for these.
590
591 // Special treatment.
593
594 // Handle constrained floating-point operations.
603 if (Subtarget.hasFPExtension()) {
610 }
611
612 // Extension from f16 needs libcall.
615 }
616 }
617
618 // Handle floating-point vector types.
619 if (Subtarget.hasVector()) {
620 // Scalar-to-vector conversion is just a subreg.
623
624 // Some insertions and extractions can be done directly but others
625 // need to go via integers.
630
631 // These operations have direct equivalents.
632 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
633 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
634 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
635 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
636 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
637 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
638 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
639 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
640 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
643 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
647
648 // Handle constrained floating-point operations.
662
667 if (Subtarget.hasVectorEnhancements1()) {
670 }
671 }
672
673 // The vector enhancements facility 1 has instructions for these.
674 if (Subtarget.hasVectorEnhancements1()) {
675 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
676 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
677 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
678 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
679 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
680 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
681 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
682 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
683 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
686 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
690
695
700
705
710
715
716 // Handle constrained floating-point operations.
730 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
731 MVT::v4f32, MVT::v2f64 }) {
736 }
737 }
738
739 // We only have fused f128 multiply-addition on vector registers.
740 if (!Subtarget.hasVectorEnhancements1()) {
743 }
744
745 // We don't have a copysign instruction on vector registers.
746 if (Subtarget.hasVectorEnhancements1())
748
749 // Needed so that we don't try to implement f128 constant loads using
750 // a load-and-extend of a f80 constant (in cases where the constant
751 // would fit in an f80).
752 for (MVT VT : MVT::fp_valuetypes())
753 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
754
755 // We don't have extending load instruction on vector registers.
756 if (Subtarget.hasVectorEnhancements1()) {
757 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
758 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
759 }
760
761 // Floating-point truncation and stores need to be done separately.
762 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
763 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
764 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
765
766 // We have 64-bit FPR<->GPR moves, but need special handling for
767 // 32-bit forms.
768 if (!Subtarget.hasVector()) {
771 }
772
773 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
774 // structure, but VAEND is a no-op.
778
779 if (Subtarget.isTargetzOS()) {
780 // Handle address space casts between mixed sized pointers.
783 }
784
786
787 // Codes for which we want to perform some z-specific combinations.
791 ISD::LOAD,
804 ISD::SRL,
805 ISD::SRA,
806 ISD::MUL,
807 ISD::SDIV,
808 ISD::UDIV,
809 ISD::SREM,
810 ISD::UREM,
813
814 // Handle intrinsics.
817
818 // We're not using SJLJ for exception handling, but they're implemented
819 // solely to support use of __builtin_setjmp / __builtin_longjmp.
822
823 // We want to use MVC in preference to even a single load/store pair.
824 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
826
827 // The main memset sequence is a byte store followed by an MVC.
828 // Two STC or MV..I stores win over that, but the kind of fused stores
829 // generated by target-independent code don't when the byte value is
830 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
831 // than "STC;MVC". Handle the choice in target-specific code instead.
832 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
834
835 // Default to having -disable-strictnode-mutation on
836 IsStrictFPEnabled = true;
837}
838
840 return Subtarget.hasSoftFloat();
841}
842
844 LLVMContext &, EVT VT) const {
845 if (!VT.isVector())
846 return MVT::i32;
848}
849
851 const MachineFunction &MF, EVT VT) const {
852 if (useSoftFloat())
853 return false;
854
855 VT = VT.getScalarType();
856
857 if (!VT.isSimple())
858 return false;
859
860 switch (VT.getSimpleVT().SimpleTy) {
861 case MVT::f32:
862 case MVT::f64:
863 return true;
864 case MVT::f128:
865 return Subtarget.hasVectorEnhancements1();
866 default:
867 break;
868 }
869
870 return false;
871}
872
873// Return true if the constant can be generated with a vector instruction,
874// such as VGM, VGMB or VREPI.
876 const SystemZSubtarget &Subtarget) {
877 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
878 if (!Subtarget.hasVector() ||
879 (isFP128 && !Subtarget.hasVectorEnhancements1()))
880 return false;
881
882 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
883 // preferred way of creating all-zero and all-one vectors so give it
884 // priority over other methods below.
885 unsigned Mask = 0;
886 unsigned I = 0;
887 for (; I < SystemZ::VectorBytes; ++I) {
888 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
889 if (Byte == 0xff)
890 Mask |= 1ULL << I;
891 else if (Byte != 0)
892 break;
893 }
894 if (I == SystemZ::VectorBytes) {
896 OpVals.push_back(Mask);
898 return true;
899 }
900
901 if (SplatBitSize > 64)
902 return false;
903
904 auto TryValue = [&](uint64_t Value) -> bool {
905 // Try VECTOR REPLICATE IMMEDIATE
906 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
907 if (isInt<16>(SignedValue)) {
908 OpVals.push_back(((unsigned) SignedValue));
911 SystemZ::VectorBits / SplatBitSize);
912 return true;
913 }
914 // Try VECTOR GENERATE MASK
915 unsigned Start, End;
916 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
917 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
918 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
919 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
920 OpVals.push_back(Start - (64 - SplatBitSize));
921 OpVals.push_back(End - (64 - SplatBitSize));
924 SystemZ::VectorBits / SplatBitSize);
925 return true;
926 }
927 return false;
928 };
929
930 // First try assuming that any undefined bits above the highest set bit
931 // and below the lowest set bit are 1s. This increases the likelihood of
932 // being able to use a sign-extended element value in VECTOR REPLICATE
933 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
934 uint64_t SplatBitsZ = SplatBits.getZExtValue();
935 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
936 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
937 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
938 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
939 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
940 if (TryValue(SplatBitsZ | Upper | Lower))
941 return true;
942
943 // Now try assuming that any undefined bits between the first and
944 // last defined set bits are set. This increases the chances of
945 // using a non-wraparound mask.
946 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
947 return TryValue(SplatBitsZ | Middle);
948}
949
951 if (IntImm.isSingleWord()) {
952 IntBits = APInt(128, IntImm.getZExtValue());
953 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
954 } else
955 IntBits = IntImm;
956 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
957
958 // Find the smallest splat.
959 SplatBits = IntImm;
960 unsigned Width = SplatBits.getBitWidth();
961 while (Width > 8) {
962 unsigned HalfSize = Width / 2;
963 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
964 APInt LowValue = SplatBits.trunc(HalfSize);
965
966 // If the two halves do not match, stop here.
967 if (HighValue != LowValue || 8 > HalfSize)
968 break;
969
970 SplatBits = HighValue;
971 Width = HalfSize;
972 }
973 SplatUndef = 0;
974 SplatBitSize = Width;
975}
976
978 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
979 bool HasAnyUndefs;
980
981 // Get IntBits by finding the 128 bit splat.
982 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
983 true);
984
985 // Get SplatBits by finding the 8 bit or greater splat.
986 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
987 true);
988}
989
991 bool ForCodeSize) const {
992 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
993 if (Imm.isZero() || Imm.isNegZero())
994 return true;
995
997}
998
1001 MachineBasicBlock *MBB) const {
1002 DebugLoc DL = MI.getDebugLoc();
1003 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1004 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1005
1006 MachineFunction *MF = MBB->getParent();
1008
1009 const BasicBlock *BB = MBB->getBasicBlock();
1011
1012 Register DstReg = MI.getOperand(0).getReg();
1013 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1014 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1015 (void)TRI;
1016 Register MainDstReg = MRI.createVirtualRegister(RC);
1017 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1018
1019 MVT PVT = getPointerTy(MF->getDataLayout());
1020 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1021 // For v = setjmp(buf), we generate.
1022 // Algorithm:
1023 //
1024 // ---------
1025 // | thisMBB |
1026 // ---------
1027 // |
1028 // ------------------------
1029 // | |
1030 // ---------- ---------------
1031 // | mainMBB | | restoreMBB |
1032 // | v = 0 | | v = 1 |
1033 // ---------- ---------------
1034 // | |
1035 // -------------------------
1036 // |
1037 // -----------------------------
1038 // | sinkMBB |
1039 // | phi(v_mainMBB,v_restoreMBB) |
1040 // -----------------------------
1041 // thisMBB:
1042 // buf[FPOffset] = Frame Pointer if hasFP.
1043 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1044 // buf[BCOffset] = Backchain value if building with -mbackchain.
1045 // buf[SPOffset] = Stack Pointer.
1046 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1047 // SjLjSetup restoreMBB
1048 // mainMBB:
1049 // v_main = 0
1050 // sinkMBB:
1051 // v = phi(v_main, v_restore)
1052 // restoreMBB:
1053 // v_restore = 1
1054
1055 MachineBasicBlock *ThisMBB = MBB;
1056 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1057 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1058 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1059
1060 MF->insert(I, MainMBB);
1061 MF->insert(I, SinkMBB);
1062 MF->push_back(RestoreMBB);
1063 RestoreMBB->setMachineBlockAddressTaken();
1064
1066
1067 // Transfer the remainder of BB and its successor edges to sinkMBB.
1068 SinkMBB->splice(SinkMBB->begin(), MBB,
1069 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1071
1072 // thisMBB:
1073 const int64_t FPOffset = 0; // Slot 1.
1074 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1075 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1076 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1077
1078 // Buf address.
1079 Register BufReg = MI.getOperand(1).getReg();
1080
1081 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1082 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1083
1084 // Prepare IP for longjmp.
1085 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1086 .addMBB(RestoreMBB);
1087 // Store IP for return from jmp, slot 2, offset = 1.
1088 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1089 .addReg(LabelReg)
1090 .addReg(BufReg)
1091 .addImm(LabelOffset)
1092 .addReg(0);
1093
1094 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1095 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1096 if (HasFP) {
1097 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1098 .addReg(SpecialRegs->getFramePointerRegister())
1099 .addReg(BufReg)
1100 .addImm(FPOffset)
1101 .addReg(0);
1102 }
1103
1104 // Store SP.
1105 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1106 .addReg(SpecialRegs->getStackPointerRegister())
1107 .addReg(BufReg)
1108 .addImm(SPOffset)
1109 .addReg(0);
1110
1111 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1112 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1113 if (BackChain) {
1114 Register BCReg = MRI.createVirtualRegister(PtrRC);
1115 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1116 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1117 .addReg(SpecialRegs->getStackPointerRegister())
1118 .addImm(TFL->getBackchainOffset(*MF))
1119 .addReg(0);
1120
1121 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1122 .addReg(BCReg)
1123 .addReg(BufReg)
1124 .addImm(BCOffset)
1125 .addReg(0);
1126 }
1127
1128 // Setup.
1129 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1130 .addMBB(RestoreMBB);
1131
1132 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1133 MIB.addRegMask(RegInfo->getNoPreservedMask());
1134
1135 ThisMBB->addSuccessor(MainMBB);
1136 ThisMBB->addSuccessor(RestoreMBB);
1137
1138 // mainMBB:
1139 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1140 MainMBB->addSuccessor(SinkMBB);
1141
1142 // sinkMBB:
1143 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1144 .addReg(MainDstReg)
1145 .addMBB(MainMBB)
1146 .addReg(RestoreDstReg)
1147 .addMBB(RestoreMBB);
1148
1149 // restoreMBB.
1150 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1151 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1152 RestoreMBB->addSuccessor(SinkMBB);
1153
1154 MI.eraseFromParent();
1155
1156 return SinkMBB;
1157}
1158
1161 MachineBasicBlock *MBB) const {
1162
1163 DebugLoc DL = MI.getDebugLoc();
1164 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1165
1166 MachineFunction *MF = MBB->getParent();
1168
1169 MVT PVT = getPointerTy(MF->getDataLayout());
1170 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1171 Register BufReg = MI.getOperand(0).getReg();
1172 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1173 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1174
1175 Register Tmp = MRI.createVirtualRegister(RC);
1176 Register BCReg = MRI.createVirtualRegister(RC);
1177
1179
1180 const int64_t FPOffset = 0;
1181 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1182 const int64_t BCOffset = 2 * PVT.getStoreSize();
1183 const int64_t SPOffset = 3 * PVT.getStoreSize();
1184 const int64_t LPOffset = 4 * PVT.getStoreSize();
1185
1186 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1187 .addReg(BufReg)
1188 .addImm(LabelOffset)
1189 .addReg(0);
1190
1191 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1192 SpecialRegs->getFramePointerRegister())
1193 .addReg(BufReg)
1194 .addImm(FPOffset)
1195 .addReg(0);
1196
1197 // We are restoring R13 even though we never stored in setjmp from llvm,
1198 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1199 // gcc setjmp and llvm longjmp.
1200 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1201 .addReg(BufReg)
1202 .addImm(LPOffset)
1203 .addReg(0);
1204
1205 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1206 if (BackChain) {
1207 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1208 .addReg(BufReg)
1209 .addImm(BCOffset)
1210 .addReg(0);
1211 }
1212
1213 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1214 SpecialRegs->getStackPointerRegister())
1215 .addReg(BufReg)
1216 .addImm(SPOffset)
1217 .addReg(0);
1218
1219 if (BackChain) {
1220 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1221 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1222 .addReg(BCReg)
1223 .addReg(SpecialRegs->getStackPointerRegister())
1224 .addImm(TFL->getBackchainOffset(*MF))
1225 .addReg(0);
1226 }
1227
1228 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1229
1230 MI.eraseFromParent();
1231 return MBB;
1232}
1233
1234/// Returns true if stack probing through inline assembly is requested.
1236 // If the function specifically requests inline stack probes, emit them.
1237 if (MF.getFunction().hasFnAttribute("probe-stack"))
1238 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1239 "inline-asm";
1240 return false;
1241}
1242
1246}
1247
1251}
1252
1255 // Don't expand subword operations as they require special treatment.
1256 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1258
1259 // Don't expand if there is a target instruction available.
1260 if (Subtarget.hasInterlockedAccess1() &&
1261 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1268
1270}
1271
1273 // We can use CGFI or CLGFI.
1274 return isInt<32>(Imm) || isUInt<32>(Imm);
1275}
1276
1278 // We can use ALGFI or SLGFI.
1279 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1280}
1281
1283 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1284 // Unaligned accesses should never be slower than the expanded version.
1285 // We check specifically for aligned accesses in the few cases where
1286 // they are required.
1287 if (Fast)
1288 *Fast = 1;
1289 return true;
1290}
1291
1293 EVT VT = Y.getValueType();
1294
1295 // We can use NC(G)RK for types in GPRs ...
1296 if (VT == MVT::i32 || VT == MVT::i64)
1297 return Subtarget.hasMiscellaneousExtensions3();
1298
1299 // ... or VNC for types in VRs.
1300 if (VT.isVector() || VT == MVT::i128)
1301 return Subtarget.hasVector();
1302
1303 return false;
1304}
1305
1306// Information about the addressing mode for a memory access.
1308 // True if a long displacement is supported.
1310
1311 // True if use of index register is supported.
1313
1314 AddressingMode(bool LongDispl, bool IdxReg) :
1315 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1316};
1317
1318// Return the desired addressing mode for a Load which has only one use (in
1319// the same block) which is a Store.
1321 Type *Ty) {
1322 // With vector support a Load->Store combination may be combined to either
1323 // an MVC or vector operations and it seems to work best to allow the
1324 // vector addressing mode.
1325 if (HasVector)
1326 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1327
1328 // Otherwise only the MVC case is special.
1329 bool MVC = Ty->isIntegerTy(8);
1330 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1331}
1332
1333// Return the addressing mode which seems most desirable given an LLVM
1334// Instruction pointer.
1335static AddressingMode
1337 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1338 switch (II->getIntrinsicID()) {
1339 default: break;
1340 case Intrinsic::memset:
1341 case Intrinsic::memmove:
1342 case Intrinsic::memcpy:
1343 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1344 }
1345 }
1346
1347 if (isa<LoadInst>(I) && I->hasOneUse()) {
1348 auto *SingleUser = cast<Instruction>(*I->user_begin());
1349 if (SingleUser->getParent() == I->getParent()) {
1350 if (isa<ICmpInst>(SingleUser)) {
1351 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1352 if (C->getBitWidth() <= 64 &&
1353 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1354 // Comparison of memory with 16 bit signed / unsigned immediate
1355 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1356 } else if (isa<StoreInst>(SingleUser))
1357 // Load->Store
1358 return getLoadStoreAddrMode(HasVector, I->getType());
1359 }
1360 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1361 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1362 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1363 // Load->Store
1364 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1365 }
1366
1367 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1368
1369 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1370 // dependencies (LDE only supports small offsets).
1371 // * Utilize the vector registers to hold floating point
1372 // values (vector load / store instructions only support small
1373 // offsets).
1374
1375 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1376 I->getOperand(0)->getType());
1377 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1378 bool IsVectorAccess = MemAccessTy->isVectorTy();
1379
1380 // A store of an extracted vector element will be combined into a VSTE type
1381 // instruction.
1382 if (!IsVectorAccess && isa<StoreInst>(I)) {
1383 Value *DataOp = I->getOperand(0);
1384 if (isa<ExtractElementInst>(DataOp))
1385 IsVectorAccess = true;
1386 }
1387
1388 // A load which gets inserted into a vector element will be combined into a
1389 // VLE type instruction.
1390 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1391 User *LoadUser = *I->user_begin();
1392 if (isa<InsertElementInst>(LoadUser))
1393 IsVectorAccess = true;
1394 }
1395
1396 if (IsFPAccess || IsVectorAccess)
1397 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1398 }
1399
1400 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1401}
1402
1404 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1405 // Punt on globals for now, although they can be used in limited
1406 // RELATIVE LONG cases.
1407 if (AM.BaseGV)
1408 return false;
1409
1410 // Require a 20-bit signed offset.
1411 if (!isInt<20>(AM.BaseOffs))
1412 return false;
1413
1414 bool RequireD12 =
1415 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1416 AddressingMode SupportedAM(!RequireD12, true);
1417 if (I != nullptr)
1418 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1419
1420 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1421 return false;
1422
1423 if (!SupportedAM.IndexReg)
1424 // No indexing allowed.
1425 return AM.Scale == 0;
1426 else
1427 // Indexing is OK but no scale factor can be applied.
1428 return AM.Scale == 0 || AM.Scale == 1;
1429}
1430
1432 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1433 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1434 const AttributeList &FuncAttributes) const {
1435 const int MVCFastLen = 16;
1436
1437 if (Limit != ~unsigned(0)) {
1438 // Don't expand Op into scalar loads/stores in these cases:
1439 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1440 return false; // Small memcpy: Use MVC
1441 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1442 return false; // Small memset (first byte with STC/MVI): Use MVC
1443 if (Op.isZeroMemset())
1444 return false; // Memset zero: Use XC
1445 }
1446
1448 DstAS, SrcAS, FuncAttributes);
1449}
1450
1452 LLVMContext &Context, const MemOp &Op,
1453 const AttributeList &FuncAttributes) const {
1454 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1455}
1456
1457bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1458 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1459 return false;
1460 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1461 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1462 return FromBits > ToBits;
1463}
1464
1466 if (!FromVT.isInteger() || !ToVT.isInteger())
1467 return false;
1468 unsigned FromBits = FromVT.getFixedSizeInBits();
1469 unsigned ToBits = ToVT.getFixedSizeInBits();
1470 return FromBits > ToBits;
1471}
1472
1473//===----------------------------------------------------------------------===//
1474// Inline asm support
1475//===----------------------------------------------------------------------===//
1476
1479 if (Constraint.size() == 1) {
1480 switch (Constraint[0]) {
1481 case 'a': // Address register
1482 case 'd': // Data register (equivalent to 'r')
1483 case 'f': // Floating-point register
1484 case 'h': // High-part register
1485 case 'r': // General-purpose register
1486 case 'v': // Vector register
1487 return C_RegisterClass;
1488
1489 case 'Q': // Memory with base and unsigned 12-bit displacement
1490 case 'R': // Likewise, plus an index
1491 case 'S': // Memory with base and signed 20-bit displacement
1492 case 'T': // Likewise, plus an index
1493 case 'm': // Equivalent to 'T'.
1494 return C_Memory;
1495
1496 case 'I': // Unsigned 8-bit constant
1497 case 'J': // Unsigned 12-bit constant
1498 case 'K': // Signed 16-bit constant
1499 case 'L': // Signed 20-bit displacement (on all targets we support)
1500 case 'M': // 0x7fffffff
1501 return C_Immediate;
1502
1503 default:
1504 break;
1505 }
1506 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1507 switch (Constraint[1]) {
1508 case 'Q': // Address with base and unsigned 12-bit displacement
1509 case 'R': // Likewise, plus an index
1510 case 'S': // Address with base and signed 20-bit displacement
1511 case 'T': // Likewise, plus an index
1512 return C_Address;
1513
1514 default:
1515 break;
1516 }
1517 }
1518 return TargetLowering::getConstraintType(Constraint);
1519}
1520
1523 AsmOperandInfo &Info, const char *Constraint) const {
1525 Value *CallOperandVal = Info.CallOperandVal;
1526 // If we don't have a value, we can't do a match,
1527 // but allow it at the lowest weight.
1528 if (!CallOperandVal)
1529 return CW_Default;
1530 Type *type = CallOperandVal->getType();
1531 // Look at the constraint type.
1532 switch (*Constraint) {
1533 default:
1535 break;
1536
1537 case 'a': // Address register
1538 case 'd': // Data register (equivalent to 'r')
1539 case 'h': // High-part register
1540 case 'r': // General-purpose register
1541 Weight =
1542 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1543 break;
1544
1545 case 'f': // Floating-point register
1546 if (!useSoftFloat())
1547 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1548 break;
1549
1550 case 'v': // Vector register
1551 if (Subtarget.hasVector())
1552 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1553 : CW_Default;
1554 break;
1555
1556 case 'I': // Unsigned 8-bit constant
1557 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1558 if (isUInt<8>(C->getZExtValue()))
1559 Weight = CW_Constant;
1560 break;
1561
1562 case 'J': // Unsigned 12-bit constant
1563 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1564 if (isUInt<12>(C->getZExtValue()))
1565 Weight = CW_Constant;
1566 break;
1567
1568 case 'K': // Signed 16-bit constant
1569 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1570 if (isInt<16>(C->getSExtValue()))
1571 Weight = CW_Constant;
1572 break;
1573
1574 case 'L': // Signed 20-bit displacement (on all targets we support)
1575 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1576 if (isInt<20>(C->getSExtValue()))
1577 Weight = CW_Constant;
1578 break;
1579
1580 case 'M': // 0x7fffffff
1581 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1582 if (C->getZExtValue() == 0x7fffffff)
1583 Weight = CW_Constant;
1584 break;
1585 }
1586 return Weight;
1587}
1588
1589// Parse a "{tNNN}" register constraint for which the register type "t"
1590// has already been verified. MC is the class associated with "t" and
1591// Map maps 0-based register numbers to LLVM register numbers.
1592static std::pair<unsigned, const TargetRegisterClass *>
1594 const unsigned *Map, unsigned Size) {
1595 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1596 if (isdigit(Constraint[2])) {
1597 unsigned Index;
1598 bool Failed =
1599 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1600 if (!Failed && Index < Size && Map[Index])
1601 return std::make_pair(Map[Index], RC);
1602 }
1603 return std::make_pair(0U, nullptr);
1604}
1605
1606std::pair<unsigned, const TargetRegisterClass *>
1608 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1609 if (Constraint.size() == 1) {
1610 // GCC Constraint Letters
1611 switch (Constraint[0]) {
1612 default: break;
1613 case 'd': // Data register (equivalent to 'r')
1614 case 'r': // General-purpose register
1615 if (VT.getSizeInBits() == 64)
1616 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1617 else if (VT.getSizeInBits() == 128)
1618 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1619 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1620
1621 case 'a': // Address register
1622 if (VT == MVT::i64)
1623 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1624 else if (VT == MVT::i128)
1625 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1626 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1627
1628 case 'h': // High-part register (an LLVM extension)
1629 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1630
1631 case 'f': // Floating-point register
1632 if (!useSoftFloat()) {
1633 if (VT.getSizeInBits() == 16)
1634 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1635 else if (VT.getSizeInBits() == 64)
1636 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1637 else if (VT.getSizeInBits() == 128)
1638 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1639 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1640 }
1641 break;
1642
1643 case 'v': // Vector register
1644 if (Subtarget.hasVector()) {
1645 if (VT.getSizeInBits() == 16)
1646 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1647 if (VT.getSizeInBits() == 32)
1648 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1649 if (VT.getSizeInBits() == 64)
1650 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1651 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1652 }
1653 break;
1654 }
1655 }
1656 if (Constraint.starts_with("{")) {
1657
1658 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1659 // to check the size on.
1660 auto getVTSizeInBits = [&VT]() {
1661 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1662 };
1663
1664 // We need to override the default register parsing for GPRs and FPRs
1665 // because the interpretation depends on VT. The internal names of
1666 // the registers are also different from the external names
1667 // (F0D and F0S instead of F0, etc.).
1668 if (Constraint[1] == 'r') {
1669 if (getVTSizeInBits() == 32)
1670 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1672 if (getVTSizeInBits() == 128)
1673 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1675 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1677 }
1678 if (Constraint[1] == 'f') {
1679 if (useSoftFloat())
1680 return std::make_pair(
1681 0u, static_cast<const TargetRegisterClass *>(nullptr));
1682 if (getVTSizeInBits() == 16)
1683 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1685 if (getVTSizeInBits() == 32)
1686 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1688 if (getVTSizeInBits() == 128)
1689 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1691 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1693 }
1694 if (Constraint[1] == 'v') {
1695 if (!Subtarget.hasVector())
1696 return std::make_pair(
1697 0u, static_cast<const TargetRegisterClass *>(nullptr));
1698 if (getVTSizeInBits() == 16)
1699 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1701 if (getVTSizeInBits() == 32)
1702 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1704 if (getVTSizeInBits() == 64)
1705 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1707 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1709 }
1710 }
1711 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1712}
1713
1714// FIXME? Maybe this could be a TableGen attribute on some registers and
1715// this table could be generated automatically from RegInfo.
1718 const MachineFunction &MF) const {
1719 Register Reg =
1721 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1722 : SystemZ::NoRegister)
1723 .Case("r15",
1724 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1725 .Default(Register());
1726
1727 return Reg;
1728}
1729
1731 const Constant *PersonalityFn) const {
1732 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1733}
1734
1736 const Constant *PersonalityFn) const {
1737 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1738}
1739
1741 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1742 SelectionDAG &DAG) const {
1743 // Only support length 1 constraints for now.
1744 if (Constraint.size() == 1) {
1745 switch (Constraint[0]) {
1746 case 'I': // Unsigned 8-bit constant
1747 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1748 if (isUInt<8>(C->getZExtValue()))
1749 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1750 Op.getValueType()));
1751 return;
1752
1753 case 'J': // Unsigned 12-bit constant
1754 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1755 if (isUInt<12>(C->getZExtValue()))
1756 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1757 Op.getValueType()));
1758 return;
1759
1760 case 'K': // Signed 16-bit constant
1761 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1762 if (isInt<16>(C->getSExtValue()))
1763 Ops.push_back(DAG.getSignedTargetConstant(
1764 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1765 return;
1766
1767 case 'L': // Signed 20-bit displacement (on all targets we support)
1768 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1769 if (isInt<20>(C->getSExtValue()))
1770 Ops.push_back(DAG.getSignedTargetConstant(
1771 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1772 return;
1773
1774 case 'M': // 0x7fffffff
1775 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1776 if (C->getZExtValue() == 0x7fffffff)
1777 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1778 Op.getValueType()));
1779 return;
1780 }
1781 }
1782 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1783}
1784
1785//===----------------------------------------------------------------------===//
1786// Calling conventions
1787//===----------------------------------------------------------------------===//
1788
1789#include "SystemZGenCallingConv.inc"
1790
1792 CallingConv::ID) const {
1793 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1794 SystemZ::R14D, 0 };
1795 return ScratchRegs;
1796}
1797
1799 Type *ToType) const {
1800 return isTruncateFree(FromType, ToType);
1801}
1802
1804 return CI->isTailCall();
1805}
1806
1807// Value is a value that has been passed to us in the location described by VA
1808// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1809// any loads onto Chain.
1811 CCValAssign &VA, SDValue Chain,
1812 SDValue Value) {
1813 // If the argument has been promoted from a smaller type, insert an
1814 // assertion to capture this.
1815 if (VA.getLocInfo() == CCValAssign::SExt)
1817 DAG.getValueType(VA.getValVT()));
1818 else if (VA.getLocInfo() == CCValAssign::ZExt)
1820 DAG.getValueType(VA.getValVT()));
1821
1822 if (VA.isExtInLoc())
1823 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1824 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1825 // If this is a short vector argument loaded from the stack,
1826 // extend from i64 to full vector size and then bitcast.
1827 assert(VA.getLocVT() == MVT::i64);
1828 assert(VA.getValVT().isVector());
1829 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1830 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1831 } else
1832 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1833 return Value;
1834}
1835
1836// Value is a value of type VA.getValVT() that we need to copy into
1837// the location described by VA. Return a copy of Value converted to
1838// VA.getValVT(). The caller is responsible for handling indirect values.
1840 CCValAssign &VA, SDValue Value) {
1841 switch (VA.getLocInfo()) {
1842 case CCValAssign::SExt:
1843 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1844 case CCValAssign::ZExt:
1845 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1846 case CCValAssign::AExt:
1847 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1848 case CCValAssign::BCvt: {
1849 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1850 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1851 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1852 // For an f32 vararg we need to first promote it to an f64 and then
1853 // bitcast it to an i64.
1854 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1855 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1856 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1857 ? MVT::v2i64
1858 : VA.getLocVT();
1859 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1860 // For ELF, this is a short vector argument to be stored to the stack,
1861 // bitcast to v2i64 and then extract first element.
1862 if (BitCastToType == MVT::v2i64)
1863 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1864 DAG.getConstant(0, DL, MVT::i32));
1865 return Value;
1866 }
1867 case CCValAssign::Full:
1868 return Value;
1869 default:
1870 llvm_unreachable("Unhandled getLocInfo()");
1871 }
1872}
1873
1875 SDLoc DL(In);
1876 SDValue Lo, Hi;
1877 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1878 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1879 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1880 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1881 DAG.getConstant(64, DL, MVT::i32)));
1882 } else {
1883 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1884 }
1885
1886 // FIXME: If v2i64 were a legal type, we could use it instead of
1887 // Untyped here. This might enable improved folding.
1888 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1889 MVT::Untyped, Hi, Lo);
1890 return SDValue(Pair, 0);
1891}
1892
1894 SDLoc DL(In);
1895 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1896 DL, MVT::i64, In);
1897 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1898 DL, MVT::i64, In);
1899
1900 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1901 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1902 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1903 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1904 DAG.getConstant(64, DL, MVT::i32));
1905 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1906 } else {
1907 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1908 }
1909}
1910
1912 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1913 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1914 EVT ValueVT = Val.getValueType();
1915 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1916 // Inline assembly operand.
1917 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1918 return true;
1919 }
1920
1921 return false;
1922}
1923
1925 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1926 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1927 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1928 // Inline assembly operand.
1929 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1930 return DAG.getBitcast(ValueVT, Res);
1931 }
1932
1933 return SDValue();
1934}
1935
1937 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1938 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1939 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1941 MachineFrameInfo &MFI = MF.getFrameInfo();
1943 SystemZMachineFunctionInfo *FuncInfo =
1945 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1946 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1947
1948 // Assign locations to all of the incoming arguments.
1950 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1951 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1952 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1953
1954 unsigned NumFixedGPRs = 0;
1955 unsigned NumFixedFPRs = 0;
1956 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1957 SDValue ArgValue;
1958 CCValAssign &VA = ArgLocs[I];
1959 EVT LocVT = VA.getLocVT();
1960 if (VA.isRegLoc()) {
1961 // Arguments passed in registers
1962 const TargetRegisterClass *RC;
1963 switch (LocVT.getSimpleVT().SimpleTy) {
1964 default:
1965 // Integers smaller than i64 should be promoted to i64.
1966 llvm_unreachable("Unexpected argument type");
1967 case MVT::i32:
1968 NumFixedGPRs += 1;
1969 RC = &SystemZ::GR32BitRegClass;
1970 break;
1971 case MVT::i64:
1972 NumFixedGPRs += 1;
1973 RC = &SystemZ::GR64BitRegClass;
1974 break;
1975 case MVT::f16:
1976 NumFixedFPRs += 1;
1977 RC = &SystemZ::FP16BitRegClass;
1978 break;
1979 case MVT::f32:
1980 NumFixedFPRs += 1;
1981 RC = &SystemZ::FP32BitRegClass;
1982 break;
1983 case MVT::f64:
1984 NumFixedFPRs += 1;
1985 RC = &SystemZ::FP64BitRegClass;
1986 break;
1987 case MVT::f128:
1988 NumFixedFPRs += 2;
1989 RC = &SystemZ::FP128BitRegClass;
1990 break;
1991 case MVT::v16i8:
1992 case MVT::v8i16:
1993 case MVT::v4i32:
1994 case MVT::v2i64:
1995 case MVT::v4f32:
1996 case MVT::v2f64:
1997 RC = &SystemZ::VR128BitRegClass;
1998 break;
1999 }
2000
2001 Register VReg = MRI.createVirtualRegister(RC);
2002 MRI.addLiveIn(VA.getLocReg(), VReg);
2003 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2004 } else {
2005 assert(VA.isMemLoc() && "Argument not register or memory");
2006
2007 // Create the frame index object for this incoming parameter.
2008 // FIXME: Pre-include call frame size in the offset, should not
2009 // need to manually add it here.
2010 int64_t ArgSPOffset = VA.getLocMemOffset();
2011 if (Subtarget.isTargetXPLINK64()) {
2012 auto &XPRegs =
2014 ArgSPOffset += XPRegs.getCallFrameSize();
2015 }
2016 int FI =
2017 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2018
2019 // Create the SelectionDAG nodes corresponding to a load
2020 // from this parameter. Unpromoted ints and floats are
2021 // passed as right-justified 8-byte values.
2022 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2023 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2024 VA.getLocVT() == MVT::f16) {
2025 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2026 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2027 DAG.getIntPtrConstant(SlotOffs, DL));
2028 }
2029 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2031 }
2032
2033 // Convert the value of the argument register into the value that's
2034 // being passed.
2035 if (VA.getLocInfo() == CCValAssign::Indirect) {
2036 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2038 // If the original argument was split (e.g. i128), we need
2039 // to load all parts of it here (using the same address).
2040 unsigned ArgIndex = Ins[I].OrigArgIndex;
2041 assert (Ins[I].PartOffset == 0);
2042 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
2043 CCValAssign &PartVA = ArgLocs[I + 1];
2044 unsigned PartOffset = Ins[I + 1].PartOffset;
2045 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2046 DAG.getIntPtrConstant(PartOffset, DL));
2047 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2049 ++I;
2050 }
2051 } else
2052 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2053 }
2054
2055 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2056 // Save the number of non-varargs registers for later use by va_start, etc.
2057 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2058 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2059
2060 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2061 Subtarget.getSpecialRegisters());
2062
2063 // Likewise the address (in the form of a frame index) of where the
2064 // first stack vararg would be. The 1-byte size here is arbitrary.
2065 // FIXME: Pre-include call frame size in the offset, should not
2066 // need to manually add it here.
2067 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2068 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2069 FuncInfo->setVarArgsFrameIndex(FI);
2070 }
2071
2072 if (IsVarArg && Subtarget.isTargetELF()) {
2073 // Save the number of non-varargs registers for later use by va_start, etc.
2074 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2075 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2076
2077 // Likewise the address (in the form of a frame index) of where the
2078 // first stack vararg would be. The 1-byte size here is arbitrary.
2079 int64_t VarArgsOffset = CCInfo.getStackSize();
2080 FuncInfo->setVarArgsFrameIndex(
2081 MFI.CreateFixedObject(1, VarArgsOffset, true));
2082
2083 // ...and a similar frame index for the caller-allocated save area
2084 // that will be used to store the incoming registers.
2085 int64_t RegSaveOffset =
2086 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2087 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2088 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2089
2090 // Store the FPR varargs in the reserved frame slots. (We store the
2091 // GPRs as part of the prologue.)
2092 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2094 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2095 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2096 int FI =
2098 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2100 &SystemZ::FP64BitRegClass);
2101 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2102 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2104 }
2105 // Join the stores, which are independent of one another.
2106 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2107 ArrayRef(&MemOps[NumFixedFPRs],
2108 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2109 }
2110 }
2111
2112 if (Subtarget.isTargetXPLINK64()) {
2113 // Create virual register for handling incoming "ADA" special register (R5)
2114 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2115 Register ADAvReg = MRI.createVirtualRegister(RC);
2116 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2117 Subtarget.getSpecialRegisters());
2118 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2119 FuncInfo->setADAVirtualRegister(ADAvReg);
2120 }
2121 return Chain;
2122}
2123
2124static bool canUseSiblingCall(const CCState &ArgCCInfo,
2127 // Punt if there are any indirect or stack arguments, or if the call
2128 // needs the callee-saved argument register R6, or if the call uses
2129 // the callee-saved register arguments SwiftSelf and SwiftError.
2130 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2131 CCValAssign &VA = ArgLocs[I];
2133 return false;
2134 if (!VA.isRegLoc())
2135 return false;
2136 Register Reg = VA.getLocReg();
2137 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2138 return false;
2139 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2140 return false;
2141 }
2142 return true;
2143}
2144
2146 unsigned Offset, bool LoadAdr = false) {
2149 Register ADAvReg = MFI->getADAVirtualRegister();
2151
2152 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2153 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2154
2155 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2156 if (!LoadAdr)
2157 Result = DAG.getLoad(
2158 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2160
2161 return Result;
2162}
2163
2164// ADA access using Global value
2165// Note: for functions, address of descriptor is returned
2167 EVT PtrVT) {
2168 unsigned ADAtype;
2169 bool LoadAddr = false;
2170 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2171 bool IsFunction =
2172 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2173 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2174
2175 if (IsFunction) {
2176 if (IsInternal) {
2178 LoadAddr = true;
2179 } else
2181 } else {
2183 }
2184 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2185
2186 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2187}
2188
2189static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2190 SDLoc &DL, SDValue &Chain) {
2191 unsigned ADADelta = 0; // ADA offset in desc.
2192 unsigned EPADelta = 8; // EPA offset in desc.
2195
2196 // XPLink calling convention.
2197 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2198 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2199 G->getGlobal()->hasPrivateLinkage());
2200 if (IsInternal) {
2203 Register ADAvReg = MFI->getADAVirtualRegister();
2204 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2205 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2206 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2207 return true;
2208 } else {
2210 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2211 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2212 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2213 }
2214 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2216 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2217 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2218 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2219 } else {
2220 // Function pointer case
2221 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2222 DAG.getConstant(ADADelta, DL, PtrVT));
2223 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2225 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2226 DAG.getConstant(EPADelta, DL, PtrVT));
2227 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2229 }
2230 return false;
2231}
2232
2233SDValue
2235 SmallVectorImpl<SDValue> &InVals) const {
2236 SelectionDAG &DAG = CLI.DAG;
2237 SDLoc &DL = CLI.DL;
2239 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2241 SDValue Chain = CLI.Chain;
2242 SDValue Callee = CLI.Callee;
2243 bool &IsTailCall = CLI.IsTailCall;
2244 CallingConv::ID CallConv = CLI.CallConv;
2245 bool IsVarArg = CLI.IsVarArg;
2247 EVT PtrVT = getPointerTy(MF.getDataLayout());
2248 LLVMContext &Ctx = *DAG.getContext();
2250
2251 // FIXME: z/OS support to be added in later.
2252 if (Subtarget.isTargetXPLINK64())
2253 IsTailCall = false;
2254
2255 // Integer args <=32 bits should have an extension attribute.
2256 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2257
2258 // Analyze the operands of the call, assigning locations to each operand.
2260 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2261 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2262
2263 // We don't support GuaranteedTailCallOpt, only automatically-detected
2264 // sibling calls.
2265 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2266 IsTailCall = false;
2267
2268 // Get a count of how many bytes are to be pushed on the stack.
2269 unsigned NumBytes = ArgCCInfo.getStackSize();
2270
2271 // Mark the start of the call.
2272 if (!IsTailCall)
2273 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2274
2275 // Copy argument values to their designated locations.
2277 SmallVector<SDValue, 8> MemOpChains;
2278 SDValue StackPtr;
2279 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2280 CCValAssign &VA = ArgLocs[I];
2281 SDValue ArgValue = OutVals[I];
2282
2283 if (VA.getLocInfo() == CCValAssign::Indirect) {
2284 // Store the argument in a stack slot and pass its address.
2285 unsigned ArgIndex = Outs[I].OrigArgIndex;
2286 EVT SlotVT;
2287 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2288 // Allocate the full stack space for a promoted (and split) argument.
2289 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2290 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2291 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2292 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2293 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2294 } else {
2295 SlotVT = Outs[I].VT;
2296 }
2297 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2298 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2299 MemOpChains.push_back(
2300 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2302 // If the original argument was split (e.g. i128), we need
2303 // to store all parts of it here (and pass just one address).
2304 assert (Outs[I].PartOffset == 0);
2305 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2306 SDValue PartValue = OutVals[I + 1];
2307 unsigned PartOffset = Outs[I + 1].PartOffset;
2308 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2309 DAG.getIntPtrConstant(PartOffset, DL));
2310 MemOpChains.push_back(
2311 DAG.getStore(Chain, DL, PartValue, Address,
2313 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2314 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2315 ++I;
2316 }
2317 ArgValue = SpillSlot;
2318 } else
2319 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2320
2321 if (VA.isRegLoc()) {
2322 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2323 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2324 // and low values.
2325 if (VA.getLocVT() == MVT::i128)
2326 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2327 // Queue up the argument copies and emit them at the end.
2328 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2329 } else {
2330 assert(VA.isMemLoc() && "Argument not register or memory");
2331
2332 // Work out the address of the stack slot. Unpromoted ints and
2333 // floats are passed as right-justified 8-byte values.
2334 if (!StackPtr.getNode())
2335 StackPtr = DAG.getCopyFromReg(Chain, DL,
2336 Regs->getStackPointerRegister(), PtrVT);
2337 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2338 VA.getLocMemOffset();
2339 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2340 Offset += 4;
2341 else if (VA.getLocVT() == MVT::f16)
2342 Offset += 6;
2343 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2345
2346 // Emit the store.
2347 MemOpChains.push_back(
2348 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2349
2350 // Although long doubles or vectors are passed through the stack when
2351 // they are vararg (non-fixed arguments), if a long double or vector
2352 // occupies the third and fourth slot of the argument list GPR3 should
2353 // still shadow the third slot of the argument list.
2354 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2355 SDValue ShadowArgValue =
2356 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2357 DAG.getIntPtrConstant(1, DL));
2358 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2359 }
2360 }
2361 }
2362
2363 // Join the stores, which are independent of one another.
2364 if (!MemOpChains.empty())
2365 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2366
2367 // Accept direct calls by converting symbolic call addresses to the
2368 // associated Target* opcodes. Force %r1 to be used for indirect
2369 // tail calls.
2370 SDValue Glue;
2371
2372 if (Subtarget.isTargetXPLINK64()) {
2373 SDValue ADA;
2374 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2375 if (!IsBRASL) {
2376 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2377 ->getAddressOfCalleeRegister();
2378 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2379 Glue = Chain.getValue(1);
2380 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2381 }
2382 RegsToPass.push_back(std::make_pair(
2383 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2384 } else {
2385 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2386 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2387 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2388 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2389 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2390 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2391 } else if (IsTailCall) {
2392 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2393 Glue = Chain.getValue(1);
2394 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2395 }
2396 }
2397
2398 // Build a sequence of copy-to-reg nodes, chained and glued together.
2399 for (const auto &[Reg, N] : RegsToPass) {
2400 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2401 Glue = Chain.getValue(1);
2402 }
2403
2404 // The first call operand is the chain and the second is the target address.
2406 Ops.push_back(Chain);
2407 Ops.push_back(Callee);
2408
2409 // Add argument registers to the end of the list so that they are
2410 // known live into the call.
2411 for (const auto &[Reg, N] : RegsToPass)
2412 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2413
2414 // Add a register mask operand representing the call-preserved registers.
2415 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2416 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2417 assert(Mask && "Missing call preserved mask for calling convention");
2418 Ops.push_back(DAG.getRegisterMask(Mask));
2419
2420 // Glue the call to the argument copies, if any.
2421 if (Glue.getNode())
2422 Ops.push_back(Glue);
2423
2424 // Emit the call.
2425 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2426 if (IsTailCall) {
2427 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2428 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2429 return Ret;
2430 }
2431 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2432 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2433 Glue = Chain.getValue(1);
2434
2435 // Mark the end of the call, which is glued to the call itself.
2436 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2437 Glue = Chain.getValue(1);
2438
2439 // Assign locations to each value returned by this call.
2441 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2442 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2443
2444 // Copy all of the result registers out of their specified physreg.
2445 for (CCValAssign &VA : RetLocs) {
2446 // Copy the value out, gluing the copy to the end of the call sequence.
2447 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2448 VA.getLocVT(), Glue);
2449 Chain = RetValue.getValue(1);
2450 Glue = RetValue.getValue(2);
2451
2452 // Convert the value of the return register into the value that's
2453 // being returned.
2454 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2455 }
2456
2457 return Chain;
2458}
2459
2460// Generate a call taking the given operands as arguments and returning a
2461// result of type RetVT.
2463 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2464 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2465 bool DoesNotReturn, bool IsReturnValueUsed) const {
2467 Args.reserve(Ops.size());
2468
2469 for (SDValue Op : Ops) {
2471 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2472 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2473 Entry.IsZExt = !Entry.IsSExt;
2474 Args.push_back(Entry);
2475 }
2476
2477 SDValue Callee =
2478 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2479
2480 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2482 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2483 CLI.setDebugLoc(DL)
2484 .setChain(Chain)
2485 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2486 .setNoReturn(DoesNotReturn)
2487 .setDiscardResult(!IsReturnValueUsed)
2488 .setSExtResult(SignExtend)
2489 .setZExtResult(!SignExtend);
2490 return LowerCallTo(CLI);
2491}
2492
2494 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2495 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2496 const Type *RetTy) const {
2497 // Special case that we cannot easily detect in RetCC_SystemZ since
2498 // i128 may not be a legal type.
2499 for (auto &Out : Outs)
2500 if (Out.ArgVT == MVT::i128)
2501 return false;
2502
2504 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2505 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2506}
2507
2508SDValue
2510 bool IsVarArg,
2512 const SmallVectorImpl<SDValue> &OutVals,
2513 const SDLoc &DL, SelectionDAG &DAG) const {
2515
2516 // Integer args <=32 bits should have an extension attribute.
2517 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2518
2519 // Assign locations to each returned value.
2521 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2522 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2523
2524 // Quick exit for void returns
2525 if (RetLocs.empty())
2526 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2527
2528 if (CallConv == CallingConv::GHC)
2529 report_fatal_error("GHC functions return void only");
2530
2531 // Copy the result values into the output registers.
2532 SDValue Glue;
2534 RetOps.push_back(Chain);
2535 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2536 CCValAssign &VA = RetLocs[I];
2537 SDValue RetValue = OutVals[I];
2538
2539 // Make the return register live on exit.
2540 assert(VA.isRegLoc() && "Can only return in registers!");
2541
2542 // Promote the value as required.
2543 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2544
2545 // Chain and glue the copies together.
2546 Register Reg = VA.getLocReg();
2547 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2548 Glue = Chain.getValue(1);
2549 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2550 }
2551
2552 // Update chain and glue.
2553 RetOps[0] = Chain;
2554 if (Glue.getNode())
2555 RetOps.push_back(Glue);
2556
2557 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2558}
2559
2560// Return true if Op is an intrinsic node with chain that returns the CC value
2561// as its only (other) argument. Provide the associated SystemZISD opcode and
2562// the mask of valid CC values if so.
2563static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2564 unsigned &CCValid) {
2565 unsigned Id = Op.getConstantOperandVal(1);
2566 switch (Id) {
2567 case Intrinsic::s390_tbegin:
2568 Opcode = SystemZISD::TBEGIN;
2569 CCValid = SystemZ::CCMASK_TBEGIN;
2570 return true;
2571
2572 case Intrinsic::s390_tbegin_nofloat:
2574 CCValid = SystemZ::CCMASK_TBEGIN;
2575 return true;
2576
2577 case Intrinsic::s390_tend:
2578 Opcode = SystemZISD::TEND;
2579 CCValid = SystemZ::CCMASK_TEND;
2580 return true;
2581
2582 default:
2583 return false;
2584 }
2585}
2586
2587// Return true if Op is an intrinsic node without chain that returns the
2588// CC value as its final argument. Provide the associated SystemZISD
2589// opcode and the mask of valid CC values if so.
2590static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2591 unsigned Id = Op.getConstantOperandVal(0);
2592 switch (Id) {
2593 case Intrinsic::s390_vpkshs:
2594 case Intrinsic::s390_vpksfs:
2595 case Intrinsic::s390_vpksgs:
2596 Opcode = SystemZISD::PACKS_CC;
2597 CCValid = SystemZ::CCMASK_VCMP;
2598 return true;
2599
2600 case Intrinsic::s390_vpklshs:
2601 case Intrinsic::s390_vpklsfs:
2602 case Intrinsic::s390_vpklsgs:
2603 Opcode = SystemZISD::PACKLS_CC;
2604 CCValid = SystemZ::CCMASK_VCMP;
2605 return true;
2606
2607 case Intrinsic::s390_vceqbs:
2608 case Intrinsic::s390_vceqhs:
2609 case Intrinsic::s390_vceqfs:
2610 case Intrinsic::s390_vceqgs:
2611 case Intrinsic::s390_vceqqs:
2612 Opcode = SystemZISD::VICMPES;
2613 CCValid = SystemZ::CCMASK_VCMP;
2614 return true;
2615
2616 case Intrinsic::s390_vchbs:
2617 case Intrinsic::s390_vchhs:
2618 case Intrinsic::s390_vchfs:
2619 case Intrinsic::s390_vchgs:
2620 case Intrinsic::s390_vchqs:
2621 Opcode = SystemZISD::VICMPHS;
2622 CCValid = SystemZ::CCMASK_VCMP;
2623 return true;
2624
2625 case Intrinsic::s390_vchlbs:
2626 case Intrinsic::s390_vchlhs:
2627 case Intrinsic::s390_vchlfs:
2628 case Intrinsic::s390_vchlgs:
2629 case Intrinsic::s390_vchlqs:
2630 Opcode = SystemZISD::VICMPHLS;
2631 CCValid = SystemZ::CCMASK_VCMP;
2632 return true;
2633
2634 case Intrinsic::s390_vtm:
2635 Opcode = SystemZISD::VTM;
2636 CCValid = SystemZ::CCMASK_VCMP;
2637 return true;
2638
2639 case Intrinsic::s390_vfaebs:
2640 case Intrinsic::s390_vfaehs:
2641 case Intrinsic::s390_vfaefs:
2642 Opcode = SystemZISD::VFAE_CC;
2643 CCValid = SystemZ::CCMASK_ANY;
2644 return true;
2645
2646 case Intrinsic::s390_vfaezbs:
2647 case Intrinsic::s390_vfaezhs:
2648 case Intrinsic::s390_vfaezfs:
2649 Opcode = SystemZISD::VFAEZ_CC;
2650 CCValid = SystemZ::CCMASK_ANY;
2651 return true;
2652
2653 case Intrinsic::s390_vfeebs:
2654 case Intrinsic::s390_vfeehs:
2655 case Intrinsic::s390_vfeefs:
2656 Opcode = SystemZISD::VFEE_CC;
2657 CCValid = SystemZ::CCMASK_ANY;
2658 return true;
2659
2660 case Intrinsic::s390_vfeezbs:
2661 case Intrinsic::s390_vfeezhs:
2662 case Intrinsic::s390_vfeezfs:
2663 Opcode = SystemZISD::VFEEZ_CC;
2664 CCValid = SystemZ::CCMASK_ANY;
2665 return true;
2666
2667 case Intrinsic::s390_vfenebs:
2668 case Intrinsic::s390_vfenehs:
2669 case Intrinsic::s390_vfenefs:
2670 Opcode = SystemZISD::VFENE_CC;
2671 CCValid = SystemZ::CCMASK_ANY;
2672 return true;
2673
2674 case Intrinsic::s390_vfenezbs:
2675 case Intrinsic::s390_vfenezhs:
2676 case Intrinsic::s390_vfenezfs:
2677 Opcode = SystemZISD::VFENEZ_CC;
2678 CCValid = SystemZ::CCMASK_ANY;
2679 return true;
2680
2681 case Intrinsic::s390_vistrbs:
2682 case Intrinsic::s390_vistrhs:
2683 case Intrinsic::s390_vistrfs:
2684 Opcode = SystemZISD::VISTR_CC;
2686 return true;
2687
2688 case Intrinsic::s390_vstrcbs:
2689 case Intrinsic::s390_vstrchs:
2690 case Intrinsic::s390_vstrcfs:
2691 Opcode = SystemZISD::VSTRC_CC;
2692 CCValid = SystemZ::CCMASK_ANY;
2693 return true;
2694
2695 case Intrinsic::s390_vstrczbs:
2696 case Intrinsic::s390_vstrczhs:
2697 case Intrinsic::s390_vstrczfs:
2698 Opcode = SystemZISD::VSTRCZ_CC;
2699 CCValid = SystemZ::CCMASK_ANY;
2700 return true;
2701
2702 case Intrinsic::s390_vstrsb:
2703 case Intrinsic::s390_vstrsh:
2704 case Intrinsic::s390_vstrsf:
2705 Opcode = SystemZISD::VSTRS_CC;
2706 CCValid = SystemZ::CCMASK_ANY;
2707 return true;
2708
2709 case Intrinsic::s390_vstrszb:
2710 case Intrinsic::s390_vstrszh:
2711 case Intrinsic::s390_vstrszf:
2712 Opcode = SystemZISD::VSTRSZ_CC;
2713 CCValid = SystemZ::CCMASK_ANY;
2714 return true;
2715
2716 case Intrinsic::s390_vfcedbs:
2717 case Intrinsic::s390_vfcesbs:
2718 Opcode = SystemZISD::VFCMPES;
2719 CCValid = SystemZ::CCMASK_VCMP;
2720 return true;
2721
2722 case Intrinsic::s390_vfchdbs:
2723 case Intrinsic::s390_vfchsbs:
2724 Opcode = SystemZISD::VFCMPHS;
2725 CCValid = SystemZ::CCMASK_VCMP;
2726 return true;
2727
2728 case Intrinsic::s390_vfchedbs:
2729 case Intrinsic::s390_vfchesbs:
2730 Opcode = SystemZISD::VFCMPHES;
2731 CCValid = SystemZ::CCMASK_VCMP;
2732 return true;
2733
2734 case Intrinsic::s390_vftcidb:
2735 case Intrinsic::s390_vftcisb:
2736 Opcode = SystemZISD::VFTCI;
2737 CCValid = SystemZ::CCMASK_VCMP;
2738 return true;
2739
2740 case Intrinsic::s390_tdc:
2741 Opcode = SystemZISD::TDC;
2742 CCValid = SystemZ::CCMASK_TDC;
2743 return true;
2744
2745 default:
2746 return false;
2747 }
2748}
2749
2750// Emit an intrinsic with chain and an explicit CC register result.
2752 unsigned Opcode) {
2753 // Copy all operands except the intrinsic ID.
2754 unsigned NumOps = Op.getNumOperands();
2756 Ops.reserve(NumOps - 1);
2757 Ops.push_back(Op.getOperand(0));
2758 for (unsigned I = 2; I < NumOps; ++I)
2759 Ops.push_back(Op.getOperand(I));
2760
2761 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2762 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2763 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2764 SDValue OldChain = SDValue(Op.getNode(), 1);
2765 SDValue NewChain = SDValue(Intr.getNode(), 1);
2766 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2767 return Intr.getNode();
2768}
2769
2770// Emit an intrinsic with an explicit CC register result.
2772 unsigned Opcode) {
2773 // Copy all operands except the intrinsic ID.
2774 SDLoc DL(Op);
2775 unsigned NumOps = Op.getNumOperands();
2777 Ops.reserve(NumOps - 1);
2778 for (unsigned I = 1; I < NumOps; ++I) {
2779 SDValue CurrOper = Op.getOperand(I);
2780 if (CurrOper.getValueType() == MVT::f16) {
2781 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2782 "Unhandled intrinsic with f16 operand.");
2783 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2784 }
2785 Ops.push_back(CurrOper);
2786 }
2787
2788 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2789 return Intr.getNode();
2790}
2791
2792// CC is a comparison that will be implemented using an integer or
2793// floating-point comparison. Return the condition code mask for
2794// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2795// unsigned comparisons and clear for signed ones. In the floating-point
2796// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2798#define CONV(X) \
2799 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2800 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2801 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2802
2803 switch (CC) {
2804 default:
2805 llvm_unreachable("Invalid integer condition!");
2806
2807 CONV(EQ);
2808 CONV(NE);
2809 CONV(GT);
2810 CONV(GE);
2811 CONV(LT);
2812 CONV(LE);
2813
2814 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2816 }
2817#undef CONV
2818}
2819
2820// If C can be converted to a comparison against zero, adjust the operands
2821// as necessary.
2822static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2823 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2824 return;
2825
2826 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2827 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2828 return;
2829
2830 int64_t Value = ConstOp1->getSExtValue();
2831 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2832 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2833 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2834 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2835 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2836 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2837 }
2838}
2839
2840// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2841// adjust the operands as necessary.
2842static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2843 Comparison &C) {
2844 // For us to make any changes, it must a comparison between a single-use
2845 // load and a constant.
2846 if (!C.Op0.hasOneUse() ||
2847 C.Op0.getOpcode() != ISD::LOAD ||
2848 C.Op1.getOpcode() != ISD::Constant)
2849 return;
2850
2851 // We must have an 8- or 16-bit load.
2852 auto *Load = cast<LoadSDNode>(C.Op0);
2853 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2854 if ((NumBits != 8 && NumBits != 16) ||
2855 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2856 return;
2857
2858 // The load must be an extending one and the constant must be within the
2859 // range of the unextended value.
2860 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2861 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2862 return;
2863 uint64_t Value = ConstOp1->getZExtValue();
2864 uint64_t Mask = (1 << NumBits) - 1;
2865 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2866 // Make sure that ConstOp1 is in range of C.Op0.
2867 int64_t SignedValue = ConstOp1->getSExtValue();
2868 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2869 return;
2870 if (C.ICmpType != SystemZICMP::SignedOnly) {
2871 // Unsigned comparison between two sign-extended values is equivalent
2872 // to unsigned comparison between two zero-extended values.
2873 Value &= Mask;
2874 } else if (NumBits == 8) {
2875 // Try to treat the comparison as unsigned, so that we can use CLI.
2876 // Adjust CCMask and Value as necessary.
2877 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2878 // Test whether the high bit of the byte is set.
2879 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2880 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2881 // Test whether the high bit of the byte is clear.
2882 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2883 else
2884 // No instruction exists for this combination.
2885 return;
2886 C.ICmpType = SystemZICMP::UnsignedOnly;
2887 }
2888 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2889 if (Value > Mask)
2890 return;
2891 // If the constant is in range, we can use any comparison.
2892 C.ICmpType = SystemZICMP::Any;
2893 } else
2894 return;
2895
2896 // Make sure that the first operand is an i32 of the right extension type.
2897 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2900 if (C.Op0.getValueType() != MVT::i32 ||
2901 Load->getExtensionType() != ExtType) {
2902 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2903 Load->getBasePtr(), Load->getPointerInfo(),
2904 Load->getMemoryVT(), Load->getAlign(),
2905 Load->getMemOperand()->getFlags());
2906 // Update the chain uses.
2907 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2908 }
2909
2910 // Make sure that the second operand is an i32 with the right value.
2911 if (C.Op1.getValueType() != MVT::i32 ||
2912 Value != ConstOp1->getZExtValue())
2913 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2914}
2915
2916// Return true if Op is either an unextended load, or a load suitable
2917// for integer register-memory comparisons of type ICmpType.
2918static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2919 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2920 if (Load) {
2921 // There are no instructions to compare a register with a memory byte.
2922 if (Load->getMemoryVT() == MVT::i8)
2923 return false;
2924 // Otherwise decide on extension type.
2925 switch (Load->getExtensionType()) {
2926 case ISD::NON_EXTLOAD:
2927 return true;
2928 case ISD::SEXTLOAD:
2929 return ICmpType != SystemZICMP::UnsignedOnly;
2930 case ISD::ZEXTLOAD:
2931 return ICmpType != SystemZICMP::SignedOnly;
2932 default:
2933 break;
2934 }
2935 }
2936 return false;
2937}
2938
2939// Return true if it is better to swap the operands of C.
2940static bool shouldSwapCmpOperands(const Comparison &C) {
2941 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2942 if (C.Op0.getValueType() == MVT::i128)
2943 return false;
2944 if (C.Op0.getValueType() == MVT::f128)
2945 return false;
2946
2947 // Always keep a floating-point constant second, since comparisons with
2948 // zero can use LOAD TEST and comparisons with other constants make a
2949 // natural memory operand.
2950 if (isa<ConstantFPSDNode>(C.Op1))
2951 return false;
2952
2953 // Never swap comparisons with zero since there are many ways to optimize
2954 // those later.
2955 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2956 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2957 return false;
2958
2959 // Also keep natural memory operands second if the loaded value is
2960 // only used here. Several comparisons have memory forms.
2961 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2962 return false;
2963
2964 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2965 // In that case we generally prefer the memory to be second.
2966 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2967 // The only exceptions are when the second operand is a constant and
2968 // we can use things like CHHSI.
2969 if (!ConstOp1)
2970 return true;
2971 // The unsigned memory-immediate instructions can handle 16-bit
2972 // unsigned integers.
2973 if (C.ICmpType != SystemZICMP::SignedOnly &&
2974 isUInt<16>(ConstOp1->getZExtValue()))
2975 return false;
2976 // The signed memory-immediate instructions can handle 16-bit
2977 // signed integers.
2978 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2979 isInt<16>(ConstOp1->getSExtValue()))
2980 return false;
2981 return true;
2982 }
2983
2984 // Try to promote the use of CGFR and CLGFR.
2985 unsigned Opcode0 = C.Op0.getOpcode();
2986 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2987 return true;
2988 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2989 return true;
2990 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2991 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2992 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2993 return true;
2994
2995 return false;
2996}
2997
2998// Check whether C tests for equality between X and Y and whether X - Y
2999// or Y - X is also computed. In that case it's better to compare the
3000// result of the subtraction against zero.
3002 Comparison &C) {
3003 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3004 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3005 for (SDNode *N : C.Op0->users()) {
3006 if (N->getOpcode() == ISD::SUB &&
3007 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3008 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3009 // Disable the nsw and nuw flags: the backend needs to handle
3010 // overflow as well during comparison elimination.
3011 N->dropFlags(SDNodeFlags::NoWrap);
3012 C.Op0 = SDValue(N, 0);
3013 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3014 return;
3015 }
3016 }
3017 }
3018}
3019
3020// Check whether C compares a floating-point value with zero and if that
3021// floating-point value is also negated. In this case we can use the
3022// negation to set CC, so avoiding separate LOAD AND TEST and
3023// LOAD (NEGATIVE/COMPLEMENT) instructions.
3024static void adjustForFNeg(Comparison &C) {
3025 // This optimization is invalid for strict comparisons, since FNEG
3026 // does not raise any exceptions.
3027 if (C.Chain)
3028 return;
3029 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3030 if (C1 && C1->isZero()) {
3031 for (SDNode *N : C.Op0->users()) {
3032 if (N->getOpcode() == ISD::FNEG) {
3033 C.Op0 = SDValue(N, 0);
3034 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3035 return;
3036 }
3037 }
3038 }
3039}
3040
3041// Check whether C compares (shl X, 32) with 0 and whether X is
3042// also sign-extended. In that case it is better to test the result
3043// of the sign extension using LTGFR.
3044//
3045// This case is important because InstCombine transforms a comparison
3046// with (sext (trunc X)) into a comparison with (shl X, 32).
3047static void adjustForLTGFR(Comparison &C) {
3048 // Check for a comparison between (shl X, 32) and 0.
3049 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3050 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3051 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3052 if (C1 && C1->getZExtValue() == 32) {
3053 SDValue ShlOp0 = C.Op0.getOperand(0);
3054 // See whether X has any SIGN_EXTEND_INREG uses.
3055 for (SDNode *N : ShlOp0->users()) {
3056 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3057 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3058 C.Op0 = SDValue(N, 0);
3059 return;
3060 }
3061 }
3062 }
3063 }
3064}
3065
3066// If C compares the truncation of an extending load, try to compare
3067// the untruncated value instead. This exposes more opportunities to
3068// reuse CC.
3069static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3070 Comparison &C) {
3071 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3072 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3073 C.Op1.getOpcode() == ISD::Constant &&
3074 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3075 C.Op1->getAsZExtVal() == 0) {
3076 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3077 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3078 C.Op0.getValueSizeInBits().getFixedValue()) {
3079 unsigned Type = L->getExtensionType();
3080 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3081 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3082 C.Op0 = C.Op0.getOperand(0);
3083 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3084 }
3085 }
3086 }
3087}
3088
3089// Return true if shift operation N has an in-range constant shift value.
3090// Store it in ShiftVal if so.
3091static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3092 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3093 if (!Shift)
3094 return false;
3095
3096 uint64_t Amount = Shift->getZExtValue();
3097 if (Amount >= N.getValueSizeInBits())
3098 return false;
3099
3100 ShiftVal = Amount;
3101 return true;
3102}
3103
3104// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3105// instruction and whether the CC value is descriptive enough to handle
3106// a comparison of type Opcode between the AND result and CmpVal.
3107// CCMask says which comparison result is being tested and BitSize is
3108// the number of bits in the operands. If TEST UNDER MASK can be used,
3109// return the corresponding CC mask, otherwise return 0.
3110static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3111 uint64_t Mask, uint64_t CmpVal,
3112 unsigned ICmpType) {
3113 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3114
3115 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3116 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3117 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3118 return 0;
3119
3120 // Work out the masks for the lowest and highest bits.
3122 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3123
3124 // Signed ordered comparisons are effectively unsigned if the sign
3125 // bit is dropped.
3126 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3127
3128 // Check for equality comparisons with 0, or the equivalent.
3129 if (CmpVal == 0) {
3130 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3132 if (CCMask == SystemZ::CCMASK_CMP_NE)
3134 }
3135 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3136 if (CCMask == SystemZ::CCMASK_CMP_LT)
3138 if (CCMask == SystemZ::CCMASK_CMP_GE)
3140 }
3141 if (EffectivelyUnsigned && CmpVal < Low) {
3142 if (CCMask == SystemZ::CCMASK_CMP_LE)
3144 if (CCMask == SystemZ::CCMASK_CMP_GT)
3146 }
3147
3148 // Check for equality comparisons with the mask, or the equivalent.
3149 if (CmpVal == Mask) {
3150 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3152 if (CCMask == SystemZ::CCMASK_CMP_NE)
3154 }
3155 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3156 if (CCMask == SystemZ::CCMASK_CMP_GT)
3158 if (CCMask == SystemZ::CCMASK_CMP_LE)
3160 }
3161 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3162 if (CCMask == SystemZ::CCMASK_CMP_GE)
3164 if (CCMask == SystemZ::CCMASK_CMP_LT)
3166 }
3167
3168 // Check for ordered comparisons with the top bit.
3169 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3170 if (CCMask == SystemZ::CCMASK_CMP_LE)
3172 if (CCMask == SystemZ::CCMASK_CMP_GT)
3174 }
3175 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3176 if (CCMask == SystemZ::CCMASK_CMP_LT)
3178 if (CCMask == SystemZ::CCMASK_CMP_GE)
3180 }
3181
3182 // If there are just two bits, we can do equality checks for Low and High
3183 // as well.
3184 if (Mask == Low + High) {
3185 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3187 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3189 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3191 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3193 }
3194
3195 // Looks like we've exhausted our options.
3196 return 0;
3197}
3198
3199// See whether C can be implemented as a TEST UNDER MASK instruction.
3200// Update the arguments with the TM version if so.
3202 Comparison &C) {
3203 // Use VECTOR TEST UNDER MASK for i128 operations.
3204 if (C.Op0.getValueType() == MVT::i128) {
3205 // We can use VTM for EQ/NE comparisons of x & y against 0.
3206 if (C.Op0.getOpcode() == ISD::AND &&
3207 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3208 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3209 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3210 if (Mask && Mask->getAPIntValue() == 0) {
3211 C.Opcode = SystemZISD::VTM;
3212 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3213 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3214 C.CCValid = SystemZ::CCMASK_VCMP;
3215 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3216 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3217 else
3218 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3219 }
3220 }
3221 return;
3222 }
3223
3224 // Check that we have a comparison with a constant.
3225 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3226 if (!ConstOp1)
3227 return;
3228 uint64_t CmpVal = ConstOp1->getZExtValue();
3229
3230 // Check whether the nonconstant input is an AND with a constant mask.
3231 Comparison NewC(C);
3232 uint64_t MaskVal;
3233 ConstantSDNode *Mask = nullptr;
3234 if (C.Op0.getOpcode() == ISD::AND) {
3235 NewC.Op0 = C.Op0.getOperand(0);
3236 NewC.Op1 = C.Op0.getOperand(1);
3237 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3238 if (!Mask)
3239 return;
3240 MaskVal = Mask->getZExtValue();
3241 } else {
3242 // There is no instruction to compare with a 64-bit immediate
3243 // so use TMHH instead if possible. We need an unsigned ordered
3244 // comparison with an i64 immediate.
3245 if (NewC.Op0.getValueType() != MVT::i64 ||
3246 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3247 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3248 NewC.ICmpType == SystemZICMP::SignedOnly)
3249 return;
3250 // Convert LE and GT comparisons into LT and GE.
3251 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3252 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3253 if (CmpVal == uint64_t(-1))
3254 return;
3255 CmpVal += 1;
3256 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3257 }
3258 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3259 // be masked off without changing the result.
3260 MaskVal = -(CmpVal & -CmpVal);
3261 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3262 }
3263 if (!MaskVal)
3264 return;
3265
3266 // Check whether the combination of mask, comparison value and comparison
3267 // type are suitable.
3268 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3269 unsigned NewCCMask, ShiftVal;
3270 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3271 NewC.Op0.getOpcode() == ISD::SHL &&
3272 isSimpleShift(NewC.Op0, ShiftVal) &&
3273 (MaskVal >> ShiftVal != 0) &&
3274 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3275 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3276 MaskVal >> ShiftVal,
3277 CmpVal >> ShiftVal,
3278 SystemZICMP::Any))) {
3279 NewC.Op0 = NewC.Op0.getOperand(0);
3280 MaskVal >>= ShiftVal;
3281 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3282 NewC.Op0.getOpcode() == ISD::SRL &&
3283 isSimpleShift(NewC.Op0, ShiftVal) &&
3284 (MaskVal << ShiftVal != 0) &&
3285 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3286 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3287 MaskVal << ShiftVal,
3288 CmpVal << ShiftVal,
3290 NewC.Op0 = NewC.Op0.getOperand(0);
3291 MaskVal <<= ShiftVal;
3292 } else {
3293 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3294 NewC.ICmpType);
3295 if (!NewCCMask)
3296 return;
3297 }
3298
3299 // Go ahead and make the change.
3300 C.Opcode = SystemZISD::TM;
3301 C.Op0 = NewC.Op0;
3302 if (Mask && Mask->getZExtValue() == MaskVal)
3303 C.Op1 = SDValue(Mask, 0);
3304 else
3305 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3306 C.CCValid = SystemZ::CCMASK_TM;
3307 C.CCMask = NewCCMask;
3308}
3309
3310// Implement i128 comparison in vector registers.
3311static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3312 Comparison &C) {
3313 if (C.Opcode != SystemZISD::ICMP)
3314 return;
3315 if (C.Op0.getValueType() != MVT::i128)
3316 return;
3317
3318 // Recognize vector comparison reductions.
3319 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3320 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3321 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3322 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3323 bool CmpNull = isNullConstant(C.Op1);
3324 SDValue Src = peekThroughBitcasts(C.Op0);
3325 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3326 Src = Src.getOperand(0);
3327 CmpNull = !CmpNull;
3328 }
3329 unsigned Opcode = 0;
3330 if (Src.hasOneUse()) {
3331 switch (Src.getOpcode()) {
3332 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3333 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3334 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3335 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3336 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3337 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3338 default: break;
3339 }
3340 }
3341 if (Opcode) {
3342 C.Opcode = Opcode;
3343 C.Op0 = Src->getOperand(0);
3344 C.Op1 = Src->getOperand(1);
3345 C.CCValid = SystemZ::CCMASK_VCMP;
3347 if (!CmpEq)
3348 C.CCMask ^= C.CCValid;
3349 return;
3350 }
3351 }
3352
3353 // Everything below here is not useful if we have native i128 compares.
3354 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3355 return;
3356
3357 // (In-)Equality comparisons can be implemented via VCEQGS.
3358 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3359 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3360 C.Opcode = SystemZISD::VICMPES;
3361 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3362 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3363 C.CCValid = SystemZ::CCMASK_VCMP;
3364 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3365 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3366 else
3367 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3368 return;
3369 }
3370
3371 // Normalize other comparisons to GT.
3372 bool Swap = false, Invert = false;
3373 switch (C.CCMask) {
3374 case SystemZ::CCMASK_CMP_GT: break;
3375 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3376 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3377 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3378 default: llvm_unreachable("Invalid integer condition!");
3379 }
3380 if (Swap)
3381 std::swap(C.Op0, C.Op1);
3382
3383 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3384 C.Opcode = SystemZISD::UCMP128HI;
3385 else
3386 C.Opcode = SystemZISD::SCMP128HI;
3387 C.CCValid = SystemZ::CCMASK_ANY;
3388 C.CCMask = SystemZ::CCMASK_1;
3389
3390 if (Invert)
3391 C.CCMask ^= C.CCValid;
3392}
3393
3394// See whether the comparison argument contains a redundant AND
3395// and remove it if so. This sometimes happens due to the generic
3396// BRCOND expansion.
3398 Comparison &C) {
3399 if (C.Op0.getOpcode() != ISD::AND)
3400 return;
3401 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3402 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3403 return;
3404 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3405 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3406 return;
3407
3408 C.Op0 = C.Op0.getOperand(0);
3409}
3410
3411// Return a Comparison that tests the condition-code result of intrinsic
3412// node Call against constant integer CC using comparison code Cond.
3413// Opcode is the opcode of the SystemZISD operation for the intrinsic
3414// and CCValid is the set of possible condition-code results.
3415static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3416 SDValue Call, unsigned CCValid, uint64_t CC,
3418 Comparison C(Call, SDValue(), SDValue());
3419 C.Opcode = Opcode;
3420 C.CCValid = CCValid;
3421 if (Cond == ISD::SETEQ)
3422 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3423 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3424 else if (Cond == ISD::SETNE)
3425 // ...and the inverse of that.
3426 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3427 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3428 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3429 // always true for CC>3.
3430 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3431 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3432 // ...and the inverse of that.
3433 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3434 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3435 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3436 // always true for CC>3.
3437 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3438 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3439 // ...and the inverse of that.
3440 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3441 else
3442 llvm_unreachable("Unexpected integer comparison type");
3443 C.CCMask &= CCValid;
3444 return C;
3445}
3446
3447// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3448static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3449 ISD::CondCode Cond, const SDLoc &DL,
3450 SDValue Chain = SDValue(),
3451 bool IsSignaling = false) {
3452 if (CmpOp1.getOpcode() == ISD::Constant) {
3453 assert(!Chain);
3454 unsigned Opcode, CCValid;
3455 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3456 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3457 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3458 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3459 CmpOp1->getAsZExtVal(), Cond);
3460 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3461 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3462 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3463 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3464 CmpOp1->getAsZExtVal(), Cond);
3465 }
3466 Comparison C(CmpOp0, CmpOp1, Chain);
3467 C.CCMask = CCMaskForCondCode(Cond);
3468 if (C.Op0.getValueType().isFloatingPoint()) {
3469 C.CCValid = SystemZ::CCMASK_FCMP;
3470 if (!C.Chain)
3471 C.Opcode = SystemZISD::FCMP;
3472 else if (!IsSignaling)
3473 C.Opcode = SystemZISD::STRICT_FCMP;
3474 else
3475 C.Opcode = SystemZISD::STRICT_FCMPS;
3477 } else {
3478 assert(!C.Chain);
3479 C.CCValid = SystemZ::CCMASK_ICMP;
3480 C.Opcode = SystemZISD::ICMP;
3481 // Choose the type of comparison. Equality and inequality tests can
3482 // use either signed or unsigned comparisons. The choice also doesn't
3483 // matter if both sign bits are known to be clear. In those cases we
3484 // want to give the main isel code the freedom to choose whichever
3485 // form fits best.
3486 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3487 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3488 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3489 C.ICmpType = SystemZICMP::Any;
3490 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3491 C.ICmpType = SystemZICMP::UnsignedOnly;
3492 else
3493 C.ICmpType = SystemZICMP::SignedOnly;
3494 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3495 adjustForRedundantAnd(DAG, DL, C);
3496 adjustZeroCmp(DAG, DL, C);
3497 adjustSubwordCmp(DAG, DL, C);
3498 adjustForSubtraction(DAG, DL, C);
3500 adjustICmpTruncate(DAG, DL, C);
3501 }
3502
3503 if (shouldSwapCmpOperands(C)) {
3504 std::swap(C.Op0, C.Op1);
3505 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3506 }
3507
3509 adjustICmp128(DAG, DL, C);
3510 return C;
3511}
3512
3513// Emit the comparison instruction described by C.
3514static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3515 if (!C.Op1.getNode()) {
3516 SDNode *Node;
3517 switch (C.Op0.getOpcode()) {
3519 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3520 return SDValue(Node, 0);
3522 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3523 return SDValue(Node, Node->getNumValues() - 1);
3524 default:
3525 llvm_unreachable("Invalid comparison operands");
3526 }
3527 }
3528 if (C.Opcode == SystemZISD::ICMP)
3529 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3530 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3531 if (C.Opcode == SystemZISD::TM) {
3532 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3534 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3535 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3536 }
3537 if (C.Opcode == SystemZISD::VICMPES ||
3538 C.Opcode == SystemZISD::VICMPHS ||
3539 C.Opcode == SystemZISD::VICMPHLS ||
3540 C.Opcode == SystemZISD::VFCMPES ||
3541 C.Opcode == SystemZISD::VFCMPHS ||
3542 C.Opcode == SystemZISD::VFCMPHES) {
3543 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3544 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3545 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3546 return SDValue(Val.getNode(), 1);
3547 }
3548 if (C.Chain) {
3549 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3550 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3551 }
3552 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3553}
3554
3555// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3556// 64 bits. Extend is the extension type to use. Store the high part
3557// in Hi and the low part in Lo.
3558static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3559 SDValue Op0, SDValue Op1, SDValue &Hi,
3560 SDValue &Lo) {
3561 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3562 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3563 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3564 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3565 DAG.getConstant(32, DL, MVT::i64));
3566 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3567 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3568}
3569
3570// Lower a binary operation that produces two VT results, one in each
3571// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3572// and Opcode performs the GR128 operation. Store the even register result
3573// in Even and the odd register result in Odd.
3574static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3575 unsigned Opcode, SDValue Op0, SDValue Op1,
3576 SDValue &Even, SDValue &Odd) {
3577 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3578 bool Is32Bit = is32Bit(VT);
3579 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3580 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3581}
3582
3583// Return an i32 value that is 1 if the CC value produced by CCReg is
3584// in the mask CCMask and 0 otherwise. CC is known to have a value
3585// in CCValid, so other values can be ignored.
3586static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3587 unsigned CCValid, unsigned CCMask) {
3588 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3589 DAG.getConstant(0, DL, MVT::i32),
3590 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3591 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3592 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3593}
3594
3595// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3596// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3597// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3598// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3599// floating-point comparisons.
3601static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
3602 switch (CC) {
3603 case ISD::SETOEQ:
3604 case ISD::SETEQ:
3605 switch (Mode) {
3606 case CmpMode::Int: return SystemZISD::VICMPE;
3607 case CmpMode::FP: return SystemZISD::VFCMPE;
3608 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3609 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3610 }
3611 llvm_unreachable("Bad mode");
3612
3613 case ISD::SETOGE:
3614 case ISD::SETGE:
3615 switch (Mode) {
3616 case CmpMode::Int: return 0;
3617 case CmpMode::FP: return SystemZISD::VFCMPHE;
3618 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3619 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3620 }
3621 llvm_unreachable("Bad mode");
3622
3623 case ISD::SETOGT:
3624 case ISD::SETGT:
3625 switch (Mode) {
3626 case CmpMode::Int: return SystemZISD::VICMPH;
3627 case CmpMode::FP: return SystemZISD::VFCMPH;
3628 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3629 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3630 }
3631 llvm_unreachable("Bad mode");
3632
3633 case ISD::SETUGT:
3634 switch (Mode) {
3635 case CmpMode::Int: return SystemZISD::VICMPHL;
3636 case CmpMode::FP: return 0;
3637 case CmpMode::StrictFP: return 0;
3638 case CmpMode::SignalingFP: return 0;
3639 }
3640 llvm_unreachable("Bad mode");
3641
3642 default:
3643 return 0;
3644 }
3645}
3646
3647// Return the SystemZISD vector comparison operation for CC or its inverse,
3648// or 0 if neither can be done directly. Indicate in Invert whether the
3649// result is for the inverse of CC. Mode is as above.
3651 bool &Invert) {
3652 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3653 Invert = false;
3654 return Opcode;
3655 }
3656
3657 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3658 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3659 Invert = true;
3660 return Opcode;
3661 }
3662
3663 return 0;
3664}
3665
3666// Return a v2f64 that contains the extended form of elements Start and Start+1
3667// of v4f32 value Op. If Chain is nonnull, return the strict form.
3668static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3669 SDValue Op, SDValue Chain) {
3670 int Mask[] = { Start, -1, Start + 1, -1 };
3671 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3672 if (Chain) {
3673 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3674 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3675 }
3676 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3677}
3678
3679// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3680// producing a result of type VT. If Chain is nonnull, return the strict form.
3681SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3682 const SDLoc &DL, EVT VT,
3683 SDValue CmpOp0,
3684 SDValue CmpOp1,
3685 SDValue Chain) const {
3686 // There is no hardware support for v4f32 (unless we have the vector
3687 // enhancements facility 1), so extend the vector into two v2f64s
3688 // and compare those.
3689 if (CmpOp0.getValueType() == MVT::v4f32 &&
3690 !Subtarget.hasVectorEnhancements1()) {
3691 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3692 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3693 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3694 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3695 if (Chain) {
3696 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3697 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3698 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3699 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3700 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3701 H1.getValue(1), L1.getValue(1),
3702 HRes.getValue(1), LRes.getValue(1) };
3703 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3704 SDValue Ops[2] = { Res, NewChain };
3705 return DAG.getMergeValues(Ops, DL);
3706 }
3707 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3708 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3709 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3710 }
3711 if (Chain) {
3712 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3713 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3714 }
3715 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3716}
3717
3718// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3719// an integer mask of type VT. If Chain is nonnull, we have a strict
3720// floating-point comparison. If in addition IsSignaling is true, we have
3721// a strict signaling floating-point comparison.
3722SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3723 const SDLoc &DL, EVT VT,
3724 ISD::CondCode CC,
3725 SDValue CmpOp0,
3726 SDValue CmpOp1,
3727 SDValue Chain,
3728 bool IsSignaling) const {
3729 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3730 assert (!Chain || IsFP);
3731 assert (!IsSignaling || Chain);
3732 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3733 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3734 bool Invert = false;
3735 SDValue Cmp;
3736 switch (CC) {
3737 // Handle tests for order using (or (ogt y x) (oge x y)).
3738 case ISD::SETUO:
3739 Invert = true;
3740 [[fallthrough]];
3741 case ISD::SETO: {
3742 assert(IsFP && "Unexpected integer comparison");
3743 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3744 DL, VT, CmpOp1, CmpOp0, Chain);
3745 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3746 DL, VT, CmpOp0, CmpOp1, Chain);
3747 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3748 if (Chain)
3749 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3750 LT.getValue(1), GE.getValue(1));
3751 break;
3752 }
3753
3754 // Handle <> tests using (or (ogt y x) (ogt x y)).
3755 case ISD::SETUEQ:
3756 Invert = true;
3757 [[fallthrough]];
3758 case ISD::SETONE: {
3759 assert(IsFP && "Unexpected integer comparison");
3760 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3761 DL, VT, CmpOp1, CmpOp0, Chain);
3762 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3763 DL, VT, CmpOp0, CmpOp1, Chain);
3764 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3765 if (Chain)
3766 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3767 LT.getValue(1), GT.getValue(1));
3768 break;
3769 }
3770
3771 // Otherwise a single comparison is enough. It doesn't really
3772 // matter whether we try the inversion or the swap first, since
3773 // there are no cases where both work.
3774 default:
3775 // Optimize sign-bit comparisons to signed compares.
3776 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3778 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3779 APInt Mask;
3780 if (CmpOp0.getOpcode() == ISD::AND
3781 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3782 && Mask == APInt::getSignMask(EltSize)) {
3783 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3784 CmpOp0 = CmpOp0.getOperand(0);
3785 }
3786 }
3787 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3788 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3789 else {
3791 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3792 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3793 else
3794 llvm_unreachable("Unhandled comparison");
3795 }
3796 if (Chain)
3797 Chain = Cmp.getValue(1);
3798 break;
3799 }
3800 if (Invert) {
3801 SDValue Mask =
3802 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3803 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3804 }
3805 if (Chain && Chain.getNode() != Cmp.getNode()) {
3806 SDValue Ops[2] = { Cmp, Chain };
3807 Cmp = DAG.getMergeValues(Ops, DL);
3808 }
3809 return Cmp;
3810}
3811
3812SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3813 SelectionDAG &DAG) const {
3814 SDValue CmpOp0 = Op.getOperand(0);
3815 SDValue CmpOp1 = Op.getOperand(1);
3816 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3817 SDLoc DL(Op);
3818 EVT VT = Op.getValueType();
3819 if (VT.isVector())
3820 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3821
3822 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3823 SDValue CCReg = emitCmp(DAG, DL, C);
3824 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3825}
3826
3827SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3828 SelectionDAG &DAG,
3829 bool IsSignaling) const {
3830 SDValue Chain = Op.getOperand(0);
3831 SDValue CmpOp0 = Op.getOperand(1);
3832 SDValue CmpOp1 = Op.getOperand(2);
3833 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3834 SDLoc DL(Op);
3835 EVT VT = Op.getNode()->getValueType(0);
3836 if (VT.isVector()) {
3837 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3838 Chain, IsSignaling);
3839 return Res.getValue(Op.getResNo());
3840 }
3841
3842 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3843 SDValue CCReg = emitCmp(DAG, DL, C);
3844 CCReg->setFlags(Op->getFlags());
3845 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3846 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3847 return DAG.getMergeValues(Ops, DL);
3848}
3849
3850SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3851 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3852 SDValue CmpOp0 = Op.getOperand(2);
3853 SDValue CmpOp1 = Op.getOperand(3);
3854 SDValue Dest = Op.getOperand(4);
3855 SDLoc DL(Op);
3856
3857 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3858 SDValue CCReg = emitCmp(DAG, DL, C);
3859 return DAG.getNode(
3860 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3861 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3862 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3863}
3864
3865// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3866// allowing Pos and Neg to be wider than CmpOp.
3867static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3868 return (Neg.getOpcode() == ISD::SUB &&
3869 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3870 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3871 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3872 Pos.getOperand(0) == CmpOp)));
3873}
3874
3875// Return the absolute or negative absolute of Op; IsNegative decides which.
3877 bool IsNegative) {
3878 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3879 if (IsNegative)
3880 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3881 DAG.getConstant(0, DL, Op.getValueType()), Op);
3882 return Op;
3883}
3884
3886 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3887 EVT VT = MVT::i128;
3888 unsigned Op;
3889
3890 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3891 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3892 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3893 std::swap(TrueOp, FalseOp);
3894 C.CCMask ^= C.CCValid;
3895 }
3896 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3897 std::swap(C.Op0, C.Op1);
3898 C.CCMask = SystemZ::CCMASK_CMP_GT;
3899 }
3900 switch (C.CCMask) {
3903 break;
3905 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3907 else
3909 break;
3910 default:
3911 llvm_unreachable("Unhandled comparison");
3912 break;
3913 }
3914
3915 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3916 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3917 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3918 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3919}
3920
3921SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3922 SelectionDAG &DAG) const {
3923 SDValue CmpOp0 = Op.getOperand(0);
3924 SDValue CmpOp1 = Op.getOperand(1);
3925 SDValue TrueOp = Op.getOperand(2);
3926 SDValue FalseOp = Op.getOperand(3);
3927 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3928 SDLoc DL(Op);
3929
3930 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3931 // legalizer, as it will be handled according to the type of the resulting
3932 // value. Extend them here if needed.
3933 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3934 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
3935 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
3936 }
3937
3938 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3939
3940 // Check for absolute and negative-absolute selections, including those
3941 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3942 // This check supplements the one in DAGCombiner.
3943 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3944 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3945 C.Op1.getOpcode() == ISD::Constant &&
3946 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3947 C.Op1->getAsZExtVal() == 0) {
3948 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3949 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3950 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3951 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3952 }
3953
3954 if (Subtarget.hasVectorEnhancements3() &&
3955 C.Opcode == SystemZISD::ICMP &&
3956 C.Op0.getValueType() == MVT::i128 &&
3957 TrueOp.getValueType() == MVT::i128) {
3958 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
3959 }
3960
3961 SDValue CCReg = emitCmp(DAG, DL, C);
3962 SDValue Ops[] = {TrueOp, FalseOp,
3963 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3964 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3965
3966 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3967}
3968
3969SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3970 SelectionDAG &DAG) const {
3971 SDLoc DL(Node);
3972 const GlobalValue *GV = Node->getGlobal();
3973 int64_t Offset = Node->getOffset();
3974 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3976
3978 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3979 if (isInt<32>(Offset)) {
3980 // Assign anchors at 1<<12 byte boundaries.
3981 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3982 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3983 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3984
3985 // The offset can be folded into the address if it is aligned to a
3986 // halfword.
3987 Offset -= Anchor;
3988 if (Offset != 0 && (Offset & 1) == 0) {
3989 SDValue Full =
3990 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3991 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3992 Offset = 0;
3993 }
3994 } else {
3995 // Conservatively load a constant offset greater than 32 bits into a
3996 // register below.
3997 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3998 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3999 }
4000 } else if (Subtarget.isTargetELF()) {
4001 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4002 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4003 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4005 } else if (Subtarget.isTargetzOS()) {
4006 Result = getADAEntry(DAG, GV, DL, PtrVT);
4007 } else
4008 llvm_unreachable("Unexpected Subtarget");
4009
4010 // If there was a non-zero offset that we didn't fold, create an explicit
4011 // addition for it.
4012 if (Offset != 0)
4013 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4014 DAG.getSignedConstant(Offset, DL, PtrVT));
4015
4016 return Result;
4017}
4018
4019SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4020 SelectionDAG &DAG,
4021 unsigned Opcode,
4022 SDValue GOTOffset) const {
4023 SDLoc DL(Node);
4024 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4025 SDValue Chain = DAG.getEntryNode();
4026 SDValue Glue;
4027
4030 report_fatal_error("In GHC calling convention TLS is not supported");
4031
4032 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4033 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4034 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4035 Glue = Chain.getValue(1);
4036 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4037 Glue = Chain.getValue(1);
4038
4039 // The first call operand is the chain and the second is the TLS symbol.
4041 Ops.push_back(Chain);
4042 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4043 Node->getValueType(0),
4044 0, 0));
4045
4046 // Add argument registers to the end of the list so that they are
4047 // known live into the call.
4048 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4049 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4050
4051 // Add a register mask operand representing the call-preserved registers.
4052 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4053 const uint32_t *Mask =
4054 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4055 assert(Mask && "Missing call preserved mask for calling convention");
4056 Ops.push_back(DAG.getRegisterMask(Mask));
4057
4058 // Glue the call to the argument copies.
4059 Ops.push_back(Glue);
4060
4061 // Emit the call.
4062 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4063 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4064 Glue = Chain.getValue(1);
4065
4066 // Copy the return value from %r2.
4067 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4068}
4069
4070SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4071 SelectionDAG &DAG) const {
4072 SDValue Chain = DAG.getEntryNode();
4073 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4074
4075 // The high part of the thread pointer is in access register 0.
4076 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4077 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4078
4079 // The low part of the thread pointer is in access register 1.
4080 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4081 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4082
4083 // Merge them into a single 64-bit address.
4084 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4085 DAG.getConstant(32, DL, PtrVT));
4086 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4087}
4088
4089SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4090 SelectionDAG &DAG) const {
4091 if (DAG.getTarget().useEmulatedTLS())
4092 return LowerToTLSEmulatedModel(Node, DAG);
4093 SDLoc DL(Node);
4094 const GlobalValue *GV = Node->getGlobal();
4095 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4096 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4097
4100 report_fatal_error("In GHC calling convention TLS is not supported");
4101
4102 SDValue TP = lowerThreadPointer(DL, DAG);
4103
4104 // Get the offset of GA from the thread pointer, based on the TLS model.
4106 switch (model) {
4108 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4111
4112 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4113 Offset = DAG.getLoad(
4114 PtrVT, DL, DAG.getEntryNode(), Offset,
4116
4117 // Call __tls_get_offset to retrieve the offset.
4118 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4119 break;
4120 }
4121
4123 // Load the GOT offset of the module ID.
4126
4127 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4128 Offset = DAG.getLoad(
4129 PtrVT, DL, DAG.getEntryNode(), Offset,
4131
4132 // Call __tls_get_offset to retrieve the module base offset.
4133 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4134
4135 // Note: The SystemZLDCleanupPass will remove redundant computations
4136 // of the module base offset. Count total number of local-dynamic
4137 // accesses to trigger execution of that pass.
4141
4142 // Add the per-symbol offset.
4144
4145 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4146 DTPOffset = DAG.getLoad(
4147 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4149
4150 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4151 break;
4152 }
4153
4154 case TLSModel::InitialExec: {
4155 // Load the offset from the GOT.
4156 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4159 Offset =
4160 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4162 break;
4163 }
4164
4165 case TLSModel::LocalExec: {
4166 // Force the offset into the constant pool and load it from there.
4169
4170 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4171 Offset = DAG.getLoad(
4172 PtrVT, DL, DAG.getEntryNode(), Offset,
4174 break;
4175 }
4176 }
4177
4178 // Add the base and offset together.
4179 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4180}
4181
4182SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4183 SelectionDAG &DAG) const {
4184 SDLoc DL(Node);
4185 const BlockAddress *BA = Node->getBlockAddress();
4186 int64_t Offset = Node->getOffset();
4187 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4188
4189 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4190 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4191 return Result;
4192}
4193
4194SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4195 SelectionDAG &DAG) const {
4196 SDLoc DL(JT);
4197 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4198 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4199
4200 // Use LARL to load the address of the table.
4201 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4202}
4203
4204SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4205 SelectionDAG &DAG) const {
4206 SDLoc DL(CP);
4207 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4208
4210 if (CP->isMachineConstantPoolEntry())
4211 Result =
4212 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4213 else
4214 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4215 CP->getOffset());
4216
4217 // Use LARL to load the address of the constant pool entry.
4218 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4219}
4220
4221SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4222 SelectionDAG &DAG) const {
4223 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4225 MachineFrameInfo &MFI = MF.getFrameInfo();
4226 MFI.setFrameAddressIsTaken(true);
4227
4228 SDLoc DL(Op);
4229 unsigned Depth = Op.getConstantOperandVal(0);
4230 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4231
4232 // By definition, the frame address is the address of the back chain. (In
4233 // the case of packed stack without backchain, return the address where the
4234 // backchain would have been stored. This will either be an unused space or
4235 // contain a saved register).
4236 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4237 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4238
4239 if (Depth > 0) {
4240 // FIXME The frontend should detect this case.
4241 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4242 report_fatal_error("Unsupported stack frame traversal count");
4243
4244 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4245 while (Depth--) {
4246 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4248 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4249 }
4250 }
4251
4252 return BackChain;
4253}
4254
4255SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4256 SelectionDAG &DAG) const {
4258 MachineFrameInfo &MFI = MF.getFrameInfo();
4259 MFI.setReturnAddressIsTaken(true);
4260
4261 SDLoc DL(Op);
4262 unsigned Depth = Op.getConstantOperandVal(0);
4263 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4264
4265 if (Depth > 0) {
4266 // FIXME The frontend should detect this case.
4267 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4268 report_fatal_error("Unsupported stack frame traversal count");
4269
4270 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4271 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4272 int Offset = TFL->getReturnAddressOffset(MF);
4273 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4274 DAG.getSignedConstant(Offset, DL, PtrVT));
4275 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4277 }
4278
4279 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4280 // implicit live-in.
4283 &SystemZ::GR64BitRegClass);
4284 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4285}
4286
4287SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4288 SelectionDAG &DAG) const {
4289 SDLoc DL(Op);
4290 SDValue In = Op.getOperand(0);
4291 EVT InVT = In.getValueType();
4292 EVT ResVT = Op.getValueType();
4293
4294 // Convert loads directly. This is normally done by DAGCombiner,
4295 // but we need this case for bitcasts that are created during lowering
4296 // and which are then lowered themselves.
4297 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4298 if (ISD::isNormalLoad(LoadN)) {
4299 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4300 LoadN->getBasePtr(), LoadN->getMemOperand());
4301 // Update the chain uses.
4302 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4303 return NewLoad;
4304 }
4305
4306 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4307 SDValue In64;
4308 if (Subtarget.hasHighWord()) {
4309 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4310 MVT::i64);
4311 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4312 MVT::i64, SDValue(U64, 0), In);
4313 } else {
4314 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4315 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4316 DAG.getConstant(32, DL, MVT::i64));
4317 }
4318 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4319 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4320 DL, MVT::f32, Out64);
4321 }
4322 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4323 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4324 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4325 MVT::f64, SDValue(U64, 0), In);
4326 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4327 if (Subtarget.hasHighWord())
4328 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4329 MVT::i32, Out64);
4330 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4331 DAG.getConstant(32, DL, MVT::i64));
4332 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4333 }
4334 llvm_unreachable("Unexpected bitcast combination");
4335}
4336
4337SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4338 SelectionDAG &DAG) const {
4339
4340 if (Subtarget.isTargetXPLINK64())
4341 return lowerVASTART_XPLINK(Op, DAG);
4342 else
4343 return lowerVASTART_ELF(Op, DAG);
4344}
4345
4346SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4347 SelectionDAG &DAG) const {
4349 SystemZMachineFunctionInfo *FuncInfo =
4351
4352 SDLoc DL(Op);
4353
4354 // vastart just stores the address of the VarArgsFrameIndex slot into the
4355 // memory location argument.
4356 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4357 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4358 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4359 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4360 MachinePointerInfo(SV));
4361}
4362
4363SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4364 SelectionDAG &DAG) const {
4366 SystemZMachineFunctionInfo *FuncInfo =
4368 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4369
4370 SDValue Chain = Op.getOperand(0);
4371 SDValue Addr = Op.getOperand(1);
4372 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4373 SDLoc DL(Op);
4374
4375 // The initial values of each field.
4376 const unsigned NumFields = 4;
4377 SDValue Fields[NumFields] = {
4378 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4379 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4380 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4381 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4382 };
4383
4384 // Store each field into its respective slot.
4385 SDValue MemOps[NumFields];
4386 unsigned Offset = 0;
4387 for (unsigned I = 0; I < NumFields; ++I) {
4388 SDValue FieldAddr = Addr;
4389 if (Offset != 0)
4390 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4392 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4394 Offset += 8;
4395 }
4396 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4397}
4398
4399SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4400 SelectionDAG &DAG) const {
4401 SDValue Chain = Op.getOperand(0);
4402 SDValue DstPtr = Op.getOperand(1);
4403 SDValue SrcPtr = Op.getOperand(2);
4404 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4405 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4406 SDLoc DL(Op);
4407
4408 uint32_t Sz =
4409 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4410 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4411 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4412 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4413 MachinePointerInfo(SrcSV));
4414}
4415
4416SDValue
4417SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4418 SelectionDAG &DAG) const {
4419 if (Subtarget.isTargetXPLINK64())
4420 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4421 else
4422 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4423}
4424
4425SDValue
4426SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4427 SelectionDAG &DAG) const {
4428 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4430 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4431 SDValue Chain = Op.getOperand(0);
4432 SDValue Size = Op.getOperand(1);
4433 SDValue Align = Op.getOperand(2);
4434 SDLoc DL(Op);
4435
4436 // If user has set the no alignment function attribute, ignore
4437 // alloca alignments.
4438 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4439
4440 uint64_t StackAlign = TFI->getStackAlignment();
4441 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4442 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4443
4444 SDValue NeededSpace = Size;
4445
4446 // Add extra space for alignment if needed.
4447 EVT PtrVT = getPointerTy(MF.getDataLayout());
4448 if (ExtraAlignSpace)
4449 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4450 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4451
4452 bool IsSigned = false;
4453 bool DoesNotReturn = false;
4454 bool IsReturnValueUsed = false;
4455 EVT VT = Op.getValueType();
4456 SDValue AllocaCall =
4457 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4458 CallingConv::C, IsSigned, DL, DoesNotReturn,
4459 IsReturnValueUsed)
4460 .first;
4461
4462 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4463 // to end of call in order to ensure it isn't broken up from the call
4464 // sequence.
4465 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4466 Register SPReg = Regs.getStackPointerRegister();
4467 Chain = AllocaCall.getValue(1);
4468 SDValue Glue = AllocaCall.getValue(2);
4469 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4470 Chain = NewSPRegNode.getValue(1);
4471
4472 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4473 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4474 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4475
4476 // Dynamically realign if needed.
4477 if (ExtraAlignSpace) {
4478 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4479 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4480 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4481 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4482 }
4483
4484 SDValue Ops[2] = {Result, Chain};
4485 return DAG.getMergeValues(Ops, DL);
4486}
4487
4488SDValue
4489SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4490 SelectionDAG &DAG) const {
4491 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4493 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4494 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4495
4496 SDValue Chain = Op.getOperand(0);
4497 SDValue Size = Op.getOperand(1);
4498 SDValue Align = Op.getOperand(2);
4499 SDLoc DL(Op);
4500
4501 // If user has set the no alignment function attribute, ignore
4502 // alloca alignments.
4503 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4504
4505 uint64_t StackAlign = TFI->getStackAlignment();
4506 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4507 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4508
4510 SDValue NeededSpace = Size;
4511
4512 // Get a reference to the stack pointer.
4513 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4514
4515 // If we need a backchain, save it now.
4516 SDValue Backchain;
4517 if (StoreBackchain)
4518 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4520
4521 // Add extra space for alignment if needed.
4522 if (ExtraAlignSpace)
4523 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4524 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4525
4526 // Get the new stack pointer value.
4527 SDValue NewSP;
4528 if (hasInlineStackProbe(MF)) {
4530 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4531 Chain = NewSP.getValue(1);
4532 }
4533 else {
4534 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4535 // Copy the new stack pointer back.
4536 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4537 }
4538
4539 // The allocated data lives above the 160 bytes allocated for the standard
4540 // frame, plus any outgoing stack arguments. We don't know how much that
4541 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4542 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4543 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4544
4545 // Dynamically realign if needed.
4546 if (RequiredAlign > StackAlign) {
4547 Result =
4548 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4549 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4550 Result =
4551 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4552 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4553 }
4554
4555 if (StoreBackchain)
4556 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4558
4559 SDValue Ops[2] = { Result, Chain };
4560 return DAG.getMergeValues(Ops, DL);
4561}
4562
4563SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4564 SDValue Op, SelectionDAG &DAG) const {
4565 SDLoc DL(Op);
4566
4567 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4568}
4569
4570SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4571 SelectionDAG &DAG,
4572 unsigned Opcode) const {
4573 EVT VT = Op.getValueType();
4574 SDLoc DL(Op);
4575 SDValue Even, Odd;
4576
4577 // This custom expander is only used on z17 and later for 64-bit types.
4578 assert(!is32Bit(VT));
4579 assert(Subtarget.hasMiscellaneousExtensions2());
4580
4581 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4582 // the high result in the even register. Return the latter.
4583 lowerGR128Binary(DAG, DL, VT, Opcode,
4584 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4585 return Even;
4586}
4587
4588SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4589 SelectionDAG &DAG) const {
4590 EVT VT = Op.getValueType();
4591 SDLoc DL(Op);
4592 SDValue Ops[2];
4593 if (is32Bit(VT))
4594 // Just do a normal 64-bit multiplication and extract the results.
4595 // We define this so that it can be used for constant division.
4596 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4597 Op.getOperand(1), Ops[1], Ops[0]);
4598 else if (Subtarget.hasMiscellaneousExtensions2())
4599 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4600 // the high result in the even register. ISD::SMUL_LOHI is defined to
4601 // return the low half first, so the results are in reverse order.
4603 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4604 else {
4605 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4606 //
4607 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4608 //
4609 // but using the fact that the upper halves are either all zeros
4610 // or all ones:
4611 //
4612 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4613 //
4614 // and grouping the right terms together since they are quicker than the
4615 // multiplication:
4616 //
4617 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4618 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4619 SDValue LL = Op.getOperand(0);
4620 SDValue RL = Op.getOperand(1);
4621 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4622 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4623 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4624 // the high result in the even register. ISD::SMUL_LOHI is defined to
4625 // return the low half first, so the results are in reverse order.
4627 LL, RL, Ops[1], Ops[0]);
4628 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4629 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4630 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4631 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4632 }
4633 return DAG.getMergeValues(Ops, DL);
4634}
4635
4636SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4637 SelectionDAG &DAG) const {
4638 EVT VT = Op.getValueType();
4639 SDLoc DL(Op);
4640 SDValue Ops[2];
4641 if (is32Bit(VT))
4642 // Just do a normal 64-bit multiplication and extract the results.
4643 // We define this so that it can be used for constant division.
4644 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4645 Op.getOperand(1), Ops[1], Ops[0]);
4646 else
4647 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4648 // the high result in the even register. ISD::UMUL_LOHI is defined to
4649 // return the low half first, so the results are in reverse order.
4651 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4652 return DAG.getMergeValues(Ops, DL);
4653}
4654
4655SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4656 SelectionDAG &DAG) const {
4657 SDValue Op0 = Op.getOperand(0);
4658 SDValue Op1 = Op.getOperand(1);
4659 EVT VT = Op.getValueType();
4660 SDLoc DL(Op);
4661
4662 // We use DSGF for 32-bit division. This means the first operand must
4663 // always be 64-bit, and the second operand should be 32-bit whenever
4664 // that is possible, to improve performance.
4665 if (is32Bit(VT))
4666 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4667 else if (DAG.ComputeNumSignBits(Op1) > 32)
4668 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4669
4670 // DSG(F) returns the remainder in the even register and the
4671 // quotient in the odd register.
4672 SDValue Ops[2];
4673 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4674 return DAG.getMergeValues(Ops, DL);
4675}
4676
4677SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4678 SelectionDAG &DAG) const {
4679 EVT VT = Op.getValueType();
4680 SDLoc DL(Op);
4681
4682 // DL(G) returns the remainder in the even register and the
4683 // quotient in the odd register.
4684 SDValue Ops[2];
4686 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4687 return DAG.getMergeValues(Ops, DL);
4688}
4689
4690SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4691 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4692
4693 // Get the known-zero masks for each operand.
4694 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4695 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4696 DAG.computeKnownBits(Ops[1])};
4697
4698 // See if the upper 32 bits of one operand and the lower 32 bits of the
4699 // other are known zero. They are the low and high operands respectively.
4700 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4701 Known[1].Zero.getZExtValue() };
4702 unsigned High, Low;
4703 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4704 High = 1, Low = 0;
4705 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4706 High = 0, Low = 1;
4707 else
4708 return Op;
4709
4710 SDValue LowOp = Ops[Low];
4711 SDValue HighOp = Ops[High];
4712
4713 // If the high part is a constant, we're better off using IILH.
4714 if (HighOp.getOpcode() == ISD::Constant)
4715 return Op;
4716
4717 // If the low part is a constant that is outside the range of LHI,
4718 // then we're better off using IILF.
4719 if (LowOp.getOpcode() == ISD::Constant) {
4720 int64_t Value = int32_t(LowOp->getAsZExtVal());
4721 if (!isInt<16>(Value))
4722 return Op;
4723 }
4724
4725 // Check whether the high part is an AND that doesn't change the
4726 // high 32 bits and just masks out low bits. We can skip it if so.
4727 if (HighOp.getOpcode() == ISD::AND &&
4728 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4729 SDValue HighOp0 = HighOp.getOperand(0);
4731 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4732 HighOp = HighOp0;
4733 }
4734
4735 // Take advantage of the fact that all GR32 operations only change the
4736 // low 32 bits by truncating Low to an i32 and inserting it directly
4737 // using a subreg. The interesting cases are those where the truncation
4738 // can be folded.
4739 SDLoc DL(Op);
4740 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4741 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4742 MVT::i64, HighOp, Low32);
4743}
4744
4745// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4746SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4747 SelectionDAG &DAG) const {
4748 SDNode *N = Op.getNode();
4749 SDValue LHS = N->getOperand(0);
4750 SDValue RHS = N->getOperand(1);
4751 SDLoc DL(N);
4752
4753 if (N->getValueType(0) == MVT::i128) {
4754 unsigned BaseOp = 0;
4755 unsigned FlagOp = 0;
4756 bool IsBorrow = false;
4757 switch (Op.getOpcode()) {
4758 default: llvm_unreachable("Unknown instruction!");
4759 case ISD::UADDO:
4760 BaseOp = ISD::ADD;
4761 FlagOp = SystemZISD::VACC;
4762 break;
4763 case ISD::USUBO:
4764 BaseOp = ISD::SUB;
4765 FlagOp = SystemZISD::VSCBI;
4766 IsBorrow = true;
4767 break;
4768 }
4769 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4770 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4771 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4772 DAG.getValueType(MVT::i1));
4773 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4774 if (IsBorrow)
4775 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4776 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4777 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4778 }
4779
4780 unsigned BaseOp = 0;
4781 unsigned CCValid = 0;
4782 unsigned CCMask = 0;
4783
4784 switch (Op.getOpcode()) {
4785 default: llvm_unreachable("Unknown instruction!");
4786 case ISD::SADDO:
4787 BaseOp = SystemZISD::SADDO;
4788 CCValid = SystemZ::CCMASK_ARITH;
4790 break;
4791 case ISD::SSUBO:
4792 BaseOp = SystemZISD::SSUBO;
4793 CCValid = SystemZ::CCMASK_ARITH;
4795 break;
4796 case ISD::UADDO:
4797 BaseOp = SystemZISD::UADDO;
4798 CCValid = SystemZ::CCMASK_LOGICAL;
4800 break;
4801 case ISD::USUBO:
4802 BaseOp = SystemZISD::USUBO;
4803 CCValid = SystemZ::CCMASK_LOGICAL;
4805 break;
4806 }
4807
4808 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4809 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4810
4811 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4812 if (N->getValueType(1) == MVT::i1)
4813 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4814
4815 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4816}
4817
4818static bool isAddCarryChain(SDValue Carry) {
4819 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4820 Carry->getValueType(0) != MVT::i128)
4821 Carry = Carry.getOperand(2);
4822 return Carry.getOpcode() == ISD::UADDO &&
4823 Carry->getValueType(0) != MVT::i128;
4824}
4825
4826static bool isSubBorrowChain(SDValue Carry) {
4827 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4828 Carry->getValueType(0) != MVT::i128)
4829 Carry = Carry.getOperand(2);
4830 return Carry.getOpcode() == ISD::USUBO &&
4831 Carry->getValueType(0) != MVT::i128;
4832}
4833
4834// Lower UADDO_CARRY/USUBO_CARRY nodes.
4835SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4836 SelectionDAG &DAG) const {
4837
4838 SDNode *N = Op.getNode();
4839 MVT VT = N->getSimpleValueType(0);
4840
4841 // Let legalize expand this if it isn't a legal type yet.
4842 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4843 return SDValue();
4844
4845 SDValue LHS = N->getOperand(0);
4846 SDValue RHS = N->getOperand(1);
4847 SDValue Carry = Op.getOperand(2);
4848 SDLoc DL(N);
4849
4850 if (VT == MVT::i128) {
4851 unsigned BaseOp = 0;
4852 unsigned FlagOp = 0;
4853 bool IsBorrow = false;
4854 switch (Op.getOpcode()) {
4855 default: llvm_unreachable("Unknown instruction!");
4856 case ISD::UADDO_CARRY:
4857 BaseOp = SystemZISD::VAC;
4858 FlagOp = SystemZISD::VACCC;
4859 break;
4860 case ISD::USUBO_CARRY:
4861 BaseOp = SystemZISD::VSBI;
4862 FlagOp = SystemZISD::VSBCBI;
4863 IsBorrow = true;
4864 break;
4865 }
4866 if (IsBorrow)
4867 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4868 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4869 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4870 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4871 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4872 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4873 DAG.getValueType(MVT::i1));
4874 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4875 if (IsBorrow)
4876 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4877 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4878 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4879 }
4880
4881 unsigned BaseOp = 0;
4882 unsigned CCValid = 0;
4883 unsigned CCMask = 0;
4884
4885 switch (Op.getOpcode()) {
4886 default: llvm_unreachable("Unknown instruction!");
4887 case ISD::UADDO_CARRY:
4888 if (!isAddCarryChain(Carry))
4889 return SDValue();
4890
4891 BaseOp = SystemZISD::ADDCARRY;
4892 CCValid = SystemZ::CCMASK_LOGICAL;
4894 break;
4895 case ISD::USUBO_CARRY:
4896 if (!isSubBorrowChain(Carry))
4897 return SDValue();
4898
4899 BaseOp = SystemZISD::SUBCARRY;
4900 CCValid = SystemZ::CCMASK_LOGICAL;
4902 break;
4903 }
4904
4905 // Set the condition code from the carry flag.
4906 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4907 DAG.getConstant(CCValid, DL, MVT::i32),
4908 DAG.getConstant(CCMask, DL, MVT::i32));
4909
4910 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4911 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4912
4913 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4914 if (N->getValueType(1) == MVT::i1)
4915 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4916
4917 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4918}
4919
4920SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4921 SelectionDAG &DAG) const {
4922 EVT VT = Op.getValueType();
4923 SDLoc DL(Op);
4924 Op = Op.getOperand(0);
4925
4926 if (VT.getScalarSizeInBits() == 128) {
4927 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4928 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4929 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4930 DAG.getConstant(0, DL, MVT::i64));
4931 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4932 return Op;
4933 }
4934
4935 // Handle vector types via VPOPCT.
4936 if (VT.isVector()) {
4937 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4938 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4939 switch (VT.getScalarSizeInBits()) {
4940 case 8:
4941 break;
4942 case 16: {
4943 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4944 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4945 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4946 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4947 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4948 break;
4949 }
4950 case 32: {
4951 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4952 DAG.getConstant(0, DL, MVT::i32));
4953 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4954 break;
4955 }
4956 case 64: {
4957 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4958 DAG.getConstant(0, DL, MVT::i32));
4959 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4960 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4961 break;
4962 }
4963 default:
4964 llvm_unreachable("Unexpected type");
4965 }
4966 return Op;
4967 }
4968
4969 // Get the known-zero mask for the operand.
4970 KnownBits Known = DAG.computeKnownBits(Op);
4971 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4972 if (NumSignificantBits == 0)
4973 return DAG.getConstant(0, DL, VT);
4974
4975 // Skip known-zero high parts of the operand.
4976 int64_t OrigBitSize = VT.getSizeInBits();
4977 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4978 BitSize = std::min(BitSize, OrigBitSize);
4979
4980 // The POPCNT instruction counts the number of bits in each byte.
4981 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4982 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4983 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4984
4985 // Add up per-byte counts in a binary tree. All bits of Op at
4986 // position larger than BitSize remain zero throughout.
4987 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4988 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4989 if (BitSize != OrigBitSize)
4990 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4991 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4992 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4993 }
4994
4995 // Extract overall result from high byte.
4996 if (BitSize > 8)
4997 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4998 DAG.getConstant(BitSize - 8, DL, VT));
4999
5000 return Op;
5001}
5002
5003SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5004 SelectionDAG &DAG) const {
5005 SDLoc DL(Op);
5006 AtomicOrdering FenceOrdering =
5007 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5008 SyncScope::ID FenceSSID =
5009 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5010
5011 // The only fence that needs an instruction is a sequentially-consistent
5012 // cross-thread fence.
5013 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5014 FenceSSID == SyncScope::System) {
5015 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5016 Op.getOperand(0)),
5017 0);
5018 }
5019
5020 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5021 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5022}
5023
5024SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5025 SelectionDAG &DAG) const {
5026 EVT RegVT = Op.getValueType();
5027 if (RegVT.getSizeInBits() == 128)
5028 return lowerATOMIC_LDST_I128(Op, DAG);
5029 return lowerLoadF16(Op, DAG);
5030}
5031
5032SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5033 SelectionDAG &DAG) const {
5034 auto *Node = cast<AtomicSDNode>(Op.getNode());
5035 if (Node->getMemoryVT().getSizeInBits() == 128)
5036 return lowerATOMIC_LDST_I128(Op, DAG);
5037 return lowerStoreF16(Op, DAG);
5038}
5039
5040SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5041 SelectionDAG &DAG) const {
5042 auto *Node = cast<AtomicSDNode>(Op.getNode());
5043 assert(
5044 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5045 "Only custom lowering i128 or f128.");
5046 // Use same code to handle both legal and non-legal i128 types.
5049 return DAG.getMergeValues(Results, SDLoc(Op));
5050}
5051
5052// Prepare for a Compare And Swap for a subword operation. This needs to be
5053// done in memory with 4 bytes at natural alignment.
5055 SDValue &AlignedAddr, SDValue &BitShift,
5056 SDValue &NegBitShift) {
5057 EVT PtrVT = Addr.getValueType();
5058 EVT WideVT = MVT::i32;
5059
5060 // Get the address of the containing word.
5061 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5062 DAG.getSignedConstant(-4, DL, PtrVT));
5063
5064 // Get the number of bits that the word must be rotated left in order
5065 // to bring the field to the top bits of a GR32.
5066 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5067 DAG.getConstant(3, DL, PtrVT));
5068 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5069
5070 // Get the complementing shift amount, for rotating a field in the top
5071 // bits back to its proper position.
5072 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5073 DAG.getConstant(0, DL, WideVT), BitShift);
5074
5075}
5076
5077// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5078// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5079SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5080 SelectionDAG &DAG,
5081 unsigned Opcode) const {
5082 auto *Node = cast<AtomicSDNode>(Op.getNode());
5083
5084 // 32-bit operations need no special handling.
5085 EVT NarrowVT = Node->getMemoryVT();
5086 EVT WideVT = MVT::i32;
5087 if (NarrowVT == WideVT)
5088 return Op;
5089
5090 int64_t BitSize = NarrowVT.getSizeInBits();
5091 SDValue ChainIn = Node->getChain();
5092 SDValue Addr = Node->getBasePtr();
5093 SDValue Src2 = Node->getVal();
5094 MachineMemOperand *MMO = Node->getMemOperand();
5095 SDLoc DL(Node);
5096
5097 // Convert atomic subtracts of constants into additions.
5098 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5099 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5101 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5102 Src2.getValueType());
5103 }
5104
5105 SDValue AlignedAddr, BitShift, NegBitShift;
5106 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5107
5108 // Extend the source operand to 32 bits and prepare it for the inner loop.
5109 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5110 // operations require the source to be shifted in advance. (This shift
5111 // can be folded if the source is constant.) For AND and NAND, the lower
5112 // bits must be set, while for other opcodes they should be left clear.
5113 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5114 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5115 DAG.getConstant(32 - BitSize, DL, WideVT));
5116 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5118 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5119 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5120
5121 // Construct the ATOMIC_LOADW_* node.
5122 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5123 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5124 DAG.getConstant(BitSize, DL, WideVT) };
5125 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5126 NarrowVT, MMO);
5127
5128 // Rotate the result of the final CS so that the field is in the lower
5129 // bits of a GR32, then truncate it.
5130 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5131 DAG.getConstant(BitSize, DL, WideVT));
5132 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5133
5134 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5135 return DAG.getMergeValues(RetOps, DL);
5136}
5137
5138// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5139// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5140SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5141 SelectionDAG &DAG) const {
5142 auto *Node = cast<AtomicSDNode>(Op.getNode());
5143 EVT MemVT = Node->getMemoryVT();
5144 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5145 // A full-width operation: negate and use LAA(G).
5146 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5147 assert(Subtarget.hasInterlockedAccess1() &&
5148 "Should have been expanded by AtomicExpand pass.");
5149 SDValue Src2 = Node->getVal();
5150 SDLoc DL(Src2);
5151 SDValue NegSrc2 =
5152 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5153 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5154 Node->getChain(), Node->getBasePtr(), NegSrc2,
5155 Node->getMemOperand());
5156 }
5157
5158 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5159}
5160
5161// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5162SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5163 SelectionDAG &DAG) const {
5164 auto *Node = cast<AtomicSDNode>(Op.getNode());
5165 SDValue ChainIn = Node->getOperand(0);
5166 SDValue Addr = Node->getOperand(1);
5167 SDValue CmpVal = Node->getOperand(2);
5168 SDValue SwapVal = Node->getOperand(3);
5169 MachineMemOperand *MMO = Node->getMemOperand();
5170 SDLoc DL(Node);
5171
5172 if (Node->getMemoryVT() == MVT::i128) {
5173 // Use same code to handle both legal and non-legal i128 types.
5176 return DAG.getMergeValues(Results, DL);
5177 }
5178
5179 // We have native support for 32-bit and 64-bit compare and swap, but we
5180 // still need to expand extracting the "success" result from the CC.
5181 EVT NarrowVT = Node->getMemoryVT();
5182 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5183 if (NarrowVT == WideVT) {
5184 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5185 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5187 DL, Tys, Ops, NarrowVT, MMO);
5188 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5190
5191 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5192 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5193 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5194 return SDValue();
5195 }
5196
5197 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5198 // via a fullword ATOMIC_CMP_SWAPW operation.
5199 int64_t BitSize = NarrowVT.getSizeInBits();
5200
5201 SDValue AlignedAddr, BitShift, NegBitShift;
5202 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5203
5204 // Construct the ATOMIC_CMP_SWAPW node.
5205 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5206 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5207 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5209 VTList, Ops, NarrowVT, MMO);
5210 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5212
5213 // emitAtomicCmpSwapW() will zero extend the result (original value).
5214 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5215 DAG.getValueType(NarrowVT));
5216 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5217 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5218 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5219 return SDValue();
5220}
5221
5223SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5224 // Because of how we convert atomic_load and atomic_store to normal loads and
5225 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5226 // since DAGCombine hasn't been updated to account for atomic, but non
5227 // volatile loads. (See D57601)
5228 if (auto *SI = dyn_cast<StoreInst>(&I))
5229 if (SI->isAtomic())
5231 if (auto *LI = dyn_cast<LoadInst>(&I))
5232 if (LI->isAtomic())
5234 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5235 if (AI->isAtomic())
5237 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5238 if (AI->isAtomic())
5241}
5242
5243SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5244 SelectionDAG &DAG) const {
5246 auto *Regs = Subtarget.getSpecialRegisters();
5248 report_fatal_error("Variable-sized stack allocations are not supported "
5249 "in GHC calling convention");
5250 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5251 Regs->getStackPointerRegister(), Op.getValueType());
5252}
5253
5254SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5255 SelectionDAG &DAG) const {
5257 auto *Regs = Subtarget.getSpecialRegisters();
5258 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5259
5261 report_fatal_error("Variable-sized stack allocations are not supported "
5262 "in GHC calling convention");
5263
5264 SDValue Chain = Op.getOperand(0);
5265 SDValue NewSP = Op.getOperand(1);
5266 SDValue Backchain;
5267 SDLoc DL(Op);
5268
5269 if (StoreBackchain) {
5270 SDValue OldSP = DAG.getCopyFromReg(
5271 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5272 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5274 }
5275
5276 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5277
5278 if (StoreBackchain)
5279 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5281
5282 return Chain;
5283}
5284
5285SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5286 SelectionDAG &DAG) const {
5287 bool IsData = Op.getConstantOperandVal(4);
5288 if (!IsData)
5289 // Just preserve the chain.
5290 return Op.getOperand(0);
5291
5292 SDLoc DL(Op);
5293 bool IsWrite = Op.getConstantOperandVal(2);
5294 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5295 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5296 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5297 Op.getOperand(1)};
5299 Node->getVTList(), Ops,
5300 Node->getMemoryVT(), Node->getMemOperand());
5301}
5302
5303// Convert condition code in CCReg to an i32 value.
5305 SDLoc DL(CCReg);
5306 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
5307 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
5308 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
5309}
5310
5311SDValue
5312SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5313 SelectionDAG &DAG) const {
5314 unsigned Opcode, CCValid;
5315 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5316 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5317 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5318 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5319 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5320 return SDValue();
5321 }
5322
5323 return SDValue();
5324}
5325
5326SDValue
5327SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5328 SelectionDAG &DAG) const {
5329 unsigned Opcode, CCValid;
5330 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5331 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5332 if (Op->getNumValues() == 1)
5333 return getCCResult(DAG, SDValue(Node, 0));
5334 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5335 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5336 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5337 }
5338
5339 unsigned Id = Op.getConstantOperandVal(0);
5340 switch (Id) {
5341 case Intrinsic::thread_pointer:
5342 return lowerThreadPointer(SDLoc(Op), DAG);
5343
5344 case Intrinsic::s390_vpdi:
5345 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5346 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5347
5348 case Intrinsic::s390_vperm:
5349 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5350 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5351
5352 case Intrinsic::s390_vuphb:
5353 case Intrinsic::s390_vuphh:
5354 case Intrinsic::s390_vuphf:
5355 case Intrinsic::s390_vuphg:
5356 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5357 Op.getOperand(1));
5358
5359 case Intrinsic::s390_vuplhb:
5360 case Intrinsic::s390_vuplhh:
5361 case Intrinsic::s390_vuplhf:
5362 case Intrinsic::s390_vuplhg:
5363 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5364 Op.getOperand(1));
5365
5366 case Intrinsic::s390_vuplb:
5367 case Intrinsic::s390_vuplhw:
5368 case Intrinsic::s390_vuplf:
5369 case Intrinsic::s390_vuplg:
5370 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5371 Op.getOperand(1));
5372
5373 case Intrinsic::s390_vupllb:
5374 case Intrinsic::s390_vupllh:
5375 case Intrinsic::s390_vupllf:
5376 case Intrinsic::s390_vupllg:
5377 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5378 Op.getOperand(1));
5379
5380 case Intrinsic::s390_vsumb:
5381 case Intrinsic::s390_vsumh:
5382 case Intrinsic::s390_vsumgh:
5383 case Intrinsic::s390_vsumgf:
5384 case Intrinsic::s390_vsumqf:
5385 case Intrinsic::s390_vsumqg:
5386 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5387 Op.getOperand(1), Op.getOperand(2));
5388
5389 case Intrinsic::s390_vaq:
5390 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5391 Op.getOperand(1), Op.getOperand(2));
5392 case Intrinsic::s390_vaccb:
5393 case Intrinsic::s390_vacch:
5394 case Intrinsic::s390_vaccf:
5395 case Intrinsic::s390_vaccg:
5396 case Intrinsic::s390_vaccq:
5397 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5398 Op.getOperand(1), Op.getOperand(2));
5399 case Intrinsic::s390_vacq:
5400 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5401 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5402 case Intrinsic::s390_vacccq:
5403 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5404 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5405
5406 case Intrinsic::s390_vsq:
5407 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5408 Op.getOperand(1), Op.getOperand(2));
5409 case Intrinsic::s390_vscbib:
5410 case Intrinsic::s390_vscbih:
5411 case Intrinsic::s390_vscbif:
5412 case Intrinsic::s390_vscbig:
5413 case Intrinsic::s390_vscbiq:
5414 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5415 Op.getOperand(1), Op.getOperand(2));
5416 case Intrinsic::s390_vsbiq:
5417 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5418 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5419 case Intrinsic::s390_vsbcbiq:
5420 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5421 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5422
5423 case Intrinsic::s390_vmhb:
5424 case Intrinsic::s390_vmhh:
5425 case Intrinsic::s390_vmhf:
5426 case Intrinsic::s390_vmhg:
5427 case Intrinsic::s390_vmhq:
5428 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5429 Op.getOperand(1), Op.getOperand(2));
5430 case Intrinsic::s390_vmlhb:
5431 case Intrinsic::s390_vmlhh:
5432 case Intrinsic::s390_vmlhf:
5433 case Intrinsic::s390_vmlhg:
5434 case Intrinsic::s390_vmlhq:
5435 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5436 Op.getOperand(1), Op.getOperand(2));
5437
5438 case Intrinsic::s390_vmahb:
5439 case Intrinsic::s390_vmahh:
5440 case Intrinsic::s390_vmahf:
5441 case Intrinsic::s390_vmahg:
5442 case Intrinsic::s390_vmahq:
5443 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5444 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5445 case Intrinsic::s390_vmalhb:
5446 case Intrinsic::s390_vmalhh:
5447 case Intrinsic::s390_vmalhf:
5448 case Intrinsic::s390_vmalhg:
5449 case Intrinsic::s390_vmalhq:
5450 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5451 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5452
5453 case Intrinsic::s390_vmeb:
5454 case Intrinsic::s390_vmeh:
5455 case Intrinsic::s390_vmef:
5456 case Intrinsic::s390_vmeg:
5457 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5458 Op.getOperand(1), Op.getOperand(2));
5459 case Intrinsic::s390_vmleb:
5460 case Intrinsic::s390_vmleh:
5461 case Intrinsic::s390_vmlef:
5462 case Intrinsic::s390_vmleg:
5463 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5464 Op.getOperand(1), Op.getOperand(2));
5465 case Intrinsic::s390_vmob:
5466 case Intrinsic::s390_vmoh:
5467 case Intrinsic::s390_vmof:
5468 case Intrinsic::s390_vmog:
5469 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5470 Op.getOperand(1), Op.getOperand(2));
5471 case Intrinsic::s390_vmlob:
5472 case Intrinsic::s390_vmloh:
5473 case Intrinsic::s390_vmlof:
5474 case Intrinsic::s390_vmlog:
5475 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5476 Op.getOperand(1), Op.getOperand(2));
5477
5478 case Intrinsic::s390_vmaeb:
5479 case Intrinsic::s390_vmaeh:
5480 case Intrinsic::s390_vmaef:
5481 case Intrinsic::s390_vmaeg:
5482 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5483 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5484 Op.getOperand(1), Op.getOperand(2)),
5485 Op.getOperand(3));
5486 case Intrinsic::s390_vmaleb:
5487 case Intrinsic::s390_vmaleh:
5488 case Intrinsic::s390_vmalef:
5489 case Intrinsic::s390_vmaleg:
5490 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5491 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5492 Op.getOperand(1), Op.getOperand(2)),
5493 Op.getOperand(3));
5494 case Intrinsic::s390_vmaob:
5495 case Intrinsic::s390_vmaoh:
5496 case Intrinsic::s390_vmaof:
5497 case Intrinsic::s390_vmaog:
5498 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5499 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5500 Op.getOperand(1), Op.getOperand(2)),
5501 Op.getOperand(3));
5502 case Intrinsic::s390_vmalob:
5503 case Intrinsic::s390_vmaloh:
5504 case Intrinsic::s390_vmalof:
5505 case Intrinsic::s390_vmalog:
5506 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5507 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5508 Op.getOperand(1), Op.getOperand(2)),
5509 Op.getOperand(3));
5510 }
5511
5512 return SDValue();
5513}
5514
5515namespace {
5516// Says that SystemZISD operation Opcode can be used to perform the equivalent
5517// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5518// Operand is the constant third operand, otherwise it is the number of
5519// bytes in each element of the result.
5520struct Permute {
5521 unsigned Opcode;
5522 unsigned Operand;
5523 unsigned char Bytes[SystemZ::VectorBytes];
5524};
5525}
5526
5527static const Permute PermuteForms[] = {
5528 // VMRHG
5530 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5531 // VMRHF
5533 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5534 // VMRHH
5536 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5537 // VMRHB
5539 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5540 // VMRLG
5542 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5543 // VMRLF
5545 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5546 // VMRLH
5548 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5549 // VMRLB
5551 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5552 // VPKG
5553 { SystemZISD::PACK, 4,
5554 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5555 // VPKF
5556 { SystemZISD::PACK, 2,
5557 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5558 // VPKH
5559 { SystemZISD::PACK, 1,
5560 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5561 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5563 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5564 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5566 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5567};
5568
5569// Called after matching a vector shuffle against a particular pattern.
5570// Both the original shuffle and the pattern have two vector operands.
5571// OpNos[0] is the operand of the original shuffle that should be used for
5572// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5573// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5574// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5575// for operands 0 and 1 of the pattern.
5576static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5577 if (OpNos[0] < 0) {
5578 if (OpNos[1] < 0)
5579 return false;
5580 OpNo0 = OpNo1 = OpNos[1];
5581 } else if (OpNos[1] < 0) {
5582 OpNo0 = OpNo1 = OpNos[0];
5583 } else {
5584 OpNo0 = OpNos[0];
5585 OpNo1 = OpNos[1];
5586 }
5587 return true;
5588}
5589
5590// Bytes is a VPERM-like permute vector, except that -1 is used for
5591// undefined bytes. Return true if the VPERM can be implemented using P.
5592// When returning true set OpNo0 to the VPERM operand that should be
5593// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5594//
5595// For example, if swapping the VPERM operands allows P to match, OpNo0
5596// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5597// operand, but rewriting it to use two duplicated operands allows it to
5598// match P, then OpNo0 and OpNo1 will be the same.
5599static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5600 unsigned &OpNo0, unsigned &OpNo1) {
5601 int OpNos[] = { -1, -1 };
5602 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5603 int Elt = Bytes[I];
5604 if (Elt >= 0) {
5605 // Make sure that the two permute vectors use the same suboperand
5606 // byte number. Only the operand numbers (the high bits) are
5607 // allowed to differ.
5608 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5609 return false;
5610 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5611 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5612 // Make sure that the operand mappings are consistent with previous
5613 // elements.
5614 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5615 return false;
5616 OpNos[ModelOpNo] = RealOpNo;
5617 }
5618 }
5619 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5620}
5621
5622// As above, but search for a matching permute.
5623static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5624 unsigned &OpNo0, unsigned &OpNo1) {
5625 for (auto &P : PermuteForms)
5626 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5627 return &P;
5628 return nullptr;
5629}
5630
5631// Bytes is a VPERM-like permute vector, except that -1 is used for
5632// undefined bytes. This permute is an operand of an outer permute.
5633// See whether redistributing the -1 bytes gives a shuffle that can be
5634// implemented using P. If so, set Transform to a VPERM-like permute vector
5635// that, when applied to the result of P, gives the original permute in Bytes.
5637 const Permute &P,
5638 SmallVectorImpl<int> &Transform) {
5639 unsigned To = 0;
5640 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5641 int Elt = Bytes[From];
5642 if (Elt < 0)
5643 // Byte number From of the result is undefined.
5644 Transform[From] = -1;
5645 else {
5646 while (P.Bytes[To] != Elt) {
5647 To += 1;
5648 if (To == SystemZ::VectorBytes)
5649 return false;
5650 }
5651 Transform[From] = To;
5652 }
5653 }
5654 return true;
5655}
5656
5657// As above, but search for a matching permute.
5658static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5659 SmallVectorImpl<int> &Transform) {
5660 for (auto &P : PermuteForms)
5661 if (matchDoublePermute(Bytes, P, Transform))
5662 return &P;
5663 return nullptr;
5664}
5665
5666// Convert the mask of the given shuffle op into a byte-level mask,
5667// as if it had type vNi8.
5668static bool getVPermMask(SDValue ShuffleOp,
5669 SmallVectorImpl<int> &Bytes) {
5670 EVT VT = ShuffleOp.getValueType();
5671 unsigned NumElements = VT.getVectorNumElements();
5672 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5673
5674 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5675 Bytes.resize(NumElements * BytesPerElement, -1);
5676 for (unsigned I = 0; I < NumElements; ++I) {
5677 int Index = VSN->getMaskElt(I);
5678 if (Index >= 0)
5679 for (unsigned J = 0; J < BytesPerElement; ++J)
5680 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5681 }
5682 return true;
5683 }
5684 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5685 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5686 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5687 Bytes.resize(NumElements * BytesPerElement, -1);
5688 for (unsigned I = 0; I < NumElements; ++I)
5689 for (unsigned J = 0; J < BytesPerElement; ++J)
5690 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5691 return true;
5692 }
5693 return false;
5694}
5695
5696// Bytes is a VPERM-like permute vector, except that -1 is used for
5697// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5698// the result come from a contiguous sequence of bytes from one input.
5699// Set Base to the selector for the first byte if so.
5700static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5701 unsigned BytesPerElement, int &Base) {
5702 Base = -1;
5703 for (unsigned I = 0; I < BytesPerElement; ++I) {
5704 if (Bytes[Start + I] >= 0) {
5705 unsigned Elem = Bytes[Start + I];
5706 if (Base < 0) {
5707 Base = Elem - I;
5708 // Make sure the bytes would come from one input operand.
5709 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5710 return false;
5711 } else if (unsigned(Base) != Elem - I)
5712 return false;
5713 }
5714 }
5715 return true;
5716}
5717
5718// Bytes is a VPERM-like permute vector, except that -1 is used for
5719// undefined bytes. Return true if it can be performed using VSLDB.
5720// When returning true, set StartIndex to the shift amount and OpNo0
5721// and OpNo1 to the VPERM operands that should be used as the first
5722// and second shift operand respectively.
5724 unsigned &StartIndex, unsigned &OpNo0,
5725 unsigned &OpNo1) {
5726 int OpNos[] = { -1, -1 };
5727 int Shift = -1;
5728 for (unsigned I = 0; I < 16; ++I) {
5729 int Index = Bytes[I];
5730 if (Index >= 0) {
5731 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5732 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5733 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5734 if (Shift < 0)
5735 Shift = ExpectedShift;
5736 else if (Shift != ExpectedShift)
5737 return false;
5738 // Make sure that the operand mappings are consistent with previous
5739 // elements.
5740 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5741 return false;
5742 OpNos[ModelOpNo] = RealOpNo;
5743 }
5744 }
5745 StartIndex = Shift;
5746 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5747}
5748
5749// Create a node that performs P on operands Op0 and Op1, casting the
5750// operands to the appropriate type. The type of the result is determined by P.
5752 const Permute &P, SDValue Op0, SDValue Op1) {
5753 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5754 // elements of a PACK are twice as wide as the outputs.
5755 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5756 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5757 P.Operand);
5758 // Cast both operands to the appropriate type.
5759 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5760 SystemZ::VectorBytes / InBytes);
5761 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5762 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5763 SDValue Op;
5764 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5765 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5766 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5767 } else if (P.Opcode == SystemZISD::PACK) {
5768 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5769 SystemZ::VectorBytes / P.Operand);
5770 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5771 } else {
5772 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5773 }
5774 return Op;
5775}
5776
5777static bool isZeroVector(SDValue N) {
5778 if (N->getOpcode() == ISD::BITCAST)
5779 N = N->getOperand(0);
5780 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5781 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5782 return Op->getZExtValue() == 0;
5783 return ISD::isBuildVectorAllZeros(N.getNode());
5784}
5785
5786// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5787static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5788 for (unsigned I = 0; I < Num ; I++)
5789 if (isZeroVector(Ops[I]))
5790 return I;
5791 return UINT32_MAX;
5792}
5793
5794// Bytes is a VPERM-like permute vector, except that -1 is used for
5795// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5796// VSLDB or VPERM.
5798 SDValue *Ops,
5799 const SmallVectorImpl<int> &Bytes) {
5800 for (unsigned I = 0; I < 2; ++I)
5801 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5802
5803 // First see whether VSLDB can be used.
5804 unsigned StartIndex, OpNo0, OpNo1;
5805 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5806 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5807 Ops[OpNo1],
5808 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5809
5810 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5811 // eliminate a zero vector by reusing any zero index in the permute vector.
5812 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5813 if (ZeroVecIdx != UINT32_MAX) {
5814 bool MaskFirst = true;
5815 int ZeroIdx = -1;
5816 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5817 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5818 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5819 if (OpNo == ZeroVecIdx && I == 0) {
5820 // If the first byte is zero, use mask as first operand.
5821 ZeroIdx = 0;
5822 break;
5823 }
5824 if (OpNo != ZeroVecIdx && Byte == 0) {
5825 // If mask contains a zero, use it by placing that vector first.
5826 ZeroIdx = I + SystemZ::VectorBytes;
5827 MaskFirst = false;
5828 break;
5829 }
5830 }
5831 if (ZeroIdx != -1) {
5832 SDValue IndexNodes[SystemZ::VectorBytes];
5833 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5834 if (Bytes[I] >= 0) {
5835 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5836 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5837 if (OpNo == ZeroVecIdx)
5838 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5839 else {
5840 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5841 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5842 }
5843 } else
5844 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5845 }
5846 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5847 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5848 if (MaskFirst)
5849 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5850 Mask);
5851 else
5852 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5853 Mask);
5854 }
5855 }
5856
5857 SDValue IndexNodes[SystemZ::VectorBytes];
5858 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5859 if (Bytes[I] >= 0)
5860 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5861 else
5862 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5863 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5864 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5865 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5866}
5867
5868namespace {
5869// Describes a general N-operand vector shuffle.
5870struct GeneralShuffle {
5871 GeneralShuffle(EVT vt)
5872 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5873 void addUndef();
5874 bool add(SDValue, unsigned);
5875 SDValue getNode(SelectionDAG &, const SDLoc &);
5876 void tryPrepareForUnpack();
5877 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5878 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5879
5880 // The operands of the shuffle.
5882
5883 // Index I is -1 if byte I of the result is undefined. Otherwise the
5884 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5885 // Bytes[I] / SystemZ::VectorBytes.
5887
5888 // The type of the shuffle result.
5889 EVT VT;
5890
5891 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5892 unsigned UnpackFromEltSize;
5893 // True if the final unpack uses the low half.
5894 bool UnpackLow;
5895};
5896} // namespace
5897
5898// Add an extra undefined element to the shuffle.
5899void GeneralShuffle::addUndef() {
5900 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5901 for (unsigned I = 0; I < BytesPerElement; ++I)
5902 Bytes.push_back(-1);
5903}
5904
5905// Add an extra element to the shuffle, taking it from element Elem of Op.
5906// A null Op indicates a vector input whose value will be calculated later;
5907// there is at most one such input per shuffle and it always has the same
5908// type as the result. Aborts and returns false if the source vector elements
5909// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5910// LLVM they become implicitly extended, but this is rare and not optimized.
5911bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5912 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5913
5914 // The source vector can have wider elements than the result,
5915 // either through an explicit TRUNCATE or because of type legalization.
5916 // We want the least significant part.
5917 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5918 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5919
5920 // Return false if the source elements are smaller than their destination
5921 // elements.
5922 if (FromBytesPerElement < BytesPerElement)
5923 return false;
5924
5925 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5926 (FromBytesPerElement - BytesPerElement));
5927
5928 // Look through things like shuffles and bitcasts.
5929 while (Op.getNode()) {
5930 if (Op.getOpcode() == ISD::BITCAST)
5931 Op = Op.getOperand(0);
5932 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5933 // See whether the bytes we need come from a contiguous part of one
5934 // operand.
5936 if (!getVPermMask(Op, OpBytes))
5937 break;
5938 int NewByte;
5939 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5940 break;
5941 if (NewByte < 0) {
5942 addUndef();
5943 return true;
5944 }
5945 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5946 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5947 } else if (Op.isUndef()) {
5948 addUndef();
5949 return true;
5950 } else
5951 break;
5952 }
5953
5954 // Make sure that the source of the extraction is in Ops.
5955 unsigned OpNo = 0;
5956 for (; OpNo < Ops.size(); ++OpNo)
5957 if (Ops[OpNo] == Op)
5958 break;
5959 if (OpNo == Ops.size())
5960 Ops.push_back(Op);
5961
5962 // Add the element to Bytes.
5963 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5964 for (unsigned I = 0; I < BytesPerElement; ++I)
5965 Bytes.push_back(Base + I);
5966
5967 return true;
5968}
5969
5970// Return SDNodes for the completed shuffle.
5971SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5972 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5973
5974 if (Ops.size() == 0)
5975 return DAG.getUNDEF(VT);
5976
5977 // Use a single unpack if possible as the last operation.
5978 tryPrepareForUnpack();
5979
5980 // Make sure that there are at least two shuffle operands.
5981 if (Ops.size() == 1)
5982 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5983
5984 // Create a tree of shuffles, deferring root node until after the loop.
5985 // Try to redistribute the undefined elements of non-root nodes so that
5986 // the non-root shuffles match something like a pack or merge, then adjust
5987 // the parent node's permute vector to compensate for the new order.
5988 // Among other things, this copes with vectors like <2 x i16> that were
5989 // padded with undefined elements during type legalization.
5990 //
5991 // In the best case this redistribution will lead to the whole tree
5992 // using packs and merges. It should rarely be a loss in other cases.
5993 unsigned Stride = 1;
5994 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5995 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5996 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5997
5998 // Create a mask for just these two operands.
6000 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6001 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6002 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6003 if (OpNo == I)
6004 NewBytes[J] = Byte;
6005 else if (OpNo == I + Stride)
6006 NewBytes[J] = SystemZ::VectorBytes + Byte;
6007 else
6008 NewBytes[J] = -1;
6009 }
6010 // See if it would be better to reorganize NewMask to avoid using VPERM.
6012 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6013 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6014 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6015 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6016 if (NewBytes[J] >= 0) {
6017 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6018 "Invalid double permute");
6019 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6020 } else
6021 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6022 }
6023 } else {
6024 // Just use NewBytes on the operands.
6025 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6026 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6027 if (NewBytes[J] >= 0)
6028 Bytes[J] = I * SystemZ::VectorBytes + J;
6029 }
6030 }
6031 }
6032
6033 // Now we just have 2 inputs. Put the second operand in Ops[1].
6034 if (Stride > 1) {
6035 Ops[1] = Ops[Stride];
6036 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6037 if (Bytes[I] >= int(SystemZ::VectorBytes))
6038 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6039 }
6040
6041 // Look for an instruction that can do the permute without resorting
6042 // to VPERM.
6043 unsigned OpNo0, OpNo1;
6044 SDValue Op;
6045 if (unpackWasPrepared() && Ops[1].isUndef())
6046 Op = Ops[0];
6047 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6048 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6049 else
6050 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6051
6052 Op = insertUnpackIfPrepared(DAG, DL, Op);
6053
6054 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6055}
6056
6057#ifndef NDEBUG
6058static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6059 dbgs() << Msg.c_str() << " { ";
6060 for (unsigned I = 0; I < Bytes.size(); I++)
6061 dbgs() << Bytes[I] << " ";
6062 dbgs() << "}\n";
6063}
6064#endif
6065
6066// If the Bytes vector matches an unpack operation, prepare to do the unpack
6067// after all else by removing the zero vector and the effect of the unpack on
6068// Bytes.
6069void GeneralShuffle::tryPrepareForUnpack() {
6070 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6071 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6072 return;
6073
6074 // Only do this if removing the zero vector reduces the depth, otherwise
6075 // the critical path will increase with the final unpack.
6076 if (Ops.size() > 2 &&
6077 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6078 return;
6079
6080 // Find an unpack that would allow removing the zero vector from Ops.
6081 UnpackFromEltSize = 1;
6082 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6083 bool MatchUnpack = true;
6085 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6086 unsigned ToEltSize = UnpackFromEltSize * 2;
6087 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6088 if (!IsZextByte)
6089 SrcBytes.push_back(Bytes[Elt]);
6090 if (Bytes[Elt] != -1) {
6091 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6092 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6093 MatchUnpack = false;
6094 break;
6095 }
6096 }
6097 }
6098 if (MatchUnpack) {
6099 if (Ops.size() == 2) {
6100 // Don't use unpack if a single source operand needs rearrangement.
6101 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6102 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6103 if (SrcBytes[i] == -1)
6104 continue;
6105 if (SrcBytes[i] % 16 != int(i))
6106 CanUseUnpackHigh = false;
6107 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6108 CanUseUnpackLow = false;
6109 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6110 UnpackFromEltSize = UINT_MAX;
6111 return;
6112 }
6113 }
6114 if (!CanUseUnpackHigh)
6115 UnpackLow = true;
6116 }
6117 break;
6118 }
6119 }
6120 if (UnpackFromEltSize > 4)
6121 return;
6122
6123 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6124 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6125 << ".\n";
6126 dumpBytes(Bytes, "Original Bytes vector:"););
6127
6128 // Apply the unpack in reverse to the Bytes array.
6129 unsigned B = 0;
6130 if (UnpackLow) {
6131 while (B < SystemZ::VectorBytes / 2)
6132 Bytes[B++] = -1;
6133 }
6134 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6135 Elt += UnpackFromEltSize;
6136 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6137 Bytes[B] = Bytes[Elt];
6138 }
6139 if (!UnpackLow) {
6140 while (B < SystemZ::VectorBytes)
6141 Bytes[B++] = -1;
6142 }
6143
6144 // Remove the zero vector from Ops
6145 Ops.erase(&Ops[ZeroVecOpNo]);
6146 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6147 if (Bytes[I] >= 0) {
6148 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6149 if (OpNo > ZeroVecOpNo)
6150 Bytes[I] -= SystemZ::VectorBytes;
6151 }
6152
6153 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6154 dbgs() << "\n";);
6155}
6156
6157SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6158 const SDLoc &DL,
6159 SDValue Op) {
6160 if (!unpackWasPrepared())
6161 return Op;
6162 unsigned InBits = UnpackFromEltSize * 8;
6163 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6164 SystemZ::VectorBits / InBits);
6165 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6166 unsigned OutBits = InBits * 2;
6167 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6168 SystemZ::VectorBits / OutBits);
6169 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6171 DL, OutVT, PackedOp);
6172}
6173
6174// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6176 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6177 if (!Op.getOperand(I).isUndef())
6178 return false;
6179 return true;
6180}
6181
6182// Return a vector of type VT that contains Value in the first element.
6183// The other elements don't matter.
6185 SDValue Value) {
6186 // If we have a constant, replicate it to all elements and let the
6187 // BUILD_VECTOR lowering take care of it.
6188 if (Value.getOpcode() == ISD::Constant ||
6189 Value.getOpcode() == ISD::ConstantFP) {
6191 return DAG.getBuildVector(VT, DL, Ops);
6192 }
6193 if (Value.isUndef())
6194 return DAG.getUNDEF(VT);
6195 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6196}
6197
6198// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6199// element 1. Used for cases in which replication is cheap.
6201 SDValue Op0, SDValue Op1) {
6202 if (Op0.isUndef()) {
6203 if (Op1.isUndef())
6204 return DAG.getUNDEF(VT);
6205 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6206 }
6207 if (Op1.isUndef())
6208 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6209 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6210 buildScalarToVector(DAG, DL, VT, Op0),
6211 buildScalarToVector(DAG, DL, VT, Op1));
6212}
6213
6214// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6215// vector for them.
6217 SDValue Op1) {
6218 if (Op0.isUndef() && Op1.isUndef())
6219 return DAG.getUNDEF(MVT::v2i64);
6220 // If one of the two inputs is undefined then replicate the other one,
6221 // in order to avoid using another register unnecessarily.
6222 if (Op0.isUndef())
6223 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6224 else if (Op1.isUndef())
6225 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6226 else {
6227 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6228 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6229 }
6230 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6231}
6232
6233// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6234// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6235// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6236// would benefit from this representation and return it if so.
6238 BuildVectorSDNode *BVN) {
6239 EVT VT = BVN->getValueType(0);
6240 unsigned NumElements = VT.getVectorNumElements();
6241
6242 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6243 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6244 // need a BUILD_VECTOR, add an additional placeholder operand for that
6245 // BUILD_VECTOR and store its operands in ResidueOps.
6246 GeneralShuffle GS(VT);
6248 bool FoundOne = false;
6249 for (unsigned I = 0; I < NumElements; ++I) {
6250 SDValue Op = BVN->getOperand(I);
6251 if (Op.getOpcode() == ISD::TRUNCATE)
6252 Op = Op.getOperand(0);
6253 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6254 Op.getOperand(1).getOpcode() == ISD::Constant) {
6255 unsigned Elem = Op.getConstantOperandVal(1);
6256 if (!GS.add(Op.getOperand(0), Elem))
6257 return SDValue();
6258 FoundOne = true;
6259 } else if (Op.isUndef()) {
6260 GS.addUndef();
6261 } else {
6262 if (!GS.add(SDValue(), ResidueOps.size()))
6263 return SDValue();
6264 ResidueOps.push_back(BVN->getOperand(I));
6265 }
6266 }
6267
6268 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6269 if (!FoundOne)
6270 return SDValue();
6271
6272 // Create the BUILD_VECTOR for the remaining elements, if any.
6273 if (!ResidueOps.empty()) {
6274 while (ResidueOps.size() < NumElements)
6275 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6276 for (auto &Op : GS.Ops) {
6277 if (!Op.getNode()) {
6278 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6279 break;
6280 }
6281 }
6282 }
6283 return GS.getNode(DAG, SDLoc(BVN));
6284}
6285
6286bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6287 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6288 return true;
6289 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6290 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6291 return true;
6292 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6293 return true;
6294 return false;
6295}
6296
6297// Combine GPR scalar values Elems into a vector of type VT.
6298SDValue
6299SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6300 SmallVectorImpl<SDValue> &Elems) const {
6301 // See whether there is a single replicated value.
6303 unsigned int NumElements = Elems.size();
6304 unsigned int Count = 0;
6305 for (auto Elem : Elems) {
6306 if (!Elem.isUndef()) {
6307 if (!Single.getNode())
6308 Single = Elem;
6309 else if (Elem != Single) {
6310 Single = SDValue();
6311 break;
6312 }
6313 Count += 1;
6314 }
6315 }
6316 // There are three cases here:
6317 //
6318 // - if the only defined element is a loaded one, the best sequence
6319 // is a replicating load.
6320 //
6321 // - otherwise, if the only defined element is an i64 value, we will
6322 // end up with the same VLVGP sequence regardless of whether we short-cut
6323 // for replication or fall through to the later code.
6324 //
6325 // - otherwise, if the only defined element is an i32 or smaller value,
6326 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6327 // This is only a win if the single defined element is used more than once.
6328 // In other cases we're better off using a single VLVGx.
6329 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6330 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6331
6332 // If all elements are loads, use VLREP/VLEs (below).
6333 bool AllLoads = true;
6334 for (auto Elem : Elems)
6335 if (!isVectorElementLoad(Elem)) {
6336 AllLoads = false;
6337 break;
6338 }
6339
6340 // The best way of building a v2i64 from two i64s is to use VLVGP.
6341 if (VT == MVT::v2i64 && !AllLoads)
6342 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6343
6344 // Use a 64-bit merge high to combine two doubles.
6345 if (VT == MVT::v2f64 && !AllLoads)
6346 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6347
6348 // Build v4f32 values directly from the FPRs:
6349 //
6350 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6351 // V V VMRHF
6352 // <ABxx> <CDxx>
6353 // V VMRHG
6354 // <ABCD>
6355 if (VT == MVT::v4f32 && !AllLoads) {
6356 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6357 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6358 // Avoid unnecessary undefs by reusing the other operand.
6359 if (Op01.isUndef())
6360 Op01 = Op23;
6361 else if (Op23.isUndef())
6362 Op23 = Op01;
6363 // Merging identical replications is a no-op.
6364 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6365 return Op01;
6366 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6367 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6369 DL, MVT::v2i64, Op01, Op23);
6370 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6371 }
6372
6373 // Collect the constant terms.
6376
6377 unsigned NumConstants = 0;
6378 for (unsigned I = 0; I < NumElements; ++I) {
6379 SDValue Elem = Elems[I];
6380 if (Elem.getOpcode() == ISD::Constant ||
6381 Elem.getOpcode() == ISD::ConstantFP) {
6382 NumConstants += 1;
6383 Constants[I] = Elem;
6384 Done[I] = true;
6385 }
6386 }
6387 // If there was at least one constant, fill in the other elements of
6388 // Constants with undefs to get a full vector constant and use that
6389 // as the starting point.
6391 SDValue ReplicatedVal;
6392 if (NumConstants > 0) {
6393 for (unsigned I = 0; I < NumElements; ++I)
6394 if (!Constants[I].getNode())
6395 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6396 Result = DAG.getBuildVector(VT, DL, Constants);
6397 } else {
6398 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6399 // avoid a false dependency on any previous contents of the vector
6400 // register.
6401
6402 // Use a VLREP if at least one element is a load. Make sure to replicate
6403 // the load with the most elements having its value.
6404 std::map<const SDNode*, unsigned> UseCounts;
6405 SDNode *LoadMaxUses = nullptr;
6406 for (unsigned I = 0; I < NumElements; ++I)
6407 if (isVectorElementLoad(Elems[I])) {
6408 SDNode *Ld = Elems[I].getNode();
6409 unsigned Count = ++UseCounts[Ld];
6410 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6411 LoadMaxUses = Ld;
6412 }
6413 if (LoadMaxUses != nullptr) {
6414 ReplicatedVal = SDValue(LoadMaxUses, 0);
6415 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6416 } else {
6417 // Try to use VLVGP.
6418 unsigned I1 = NumElements / 2 - 1;
6419 unsigned I2 = NumElements - 1;
6420 bool Def1 = !Elems[I1].isUndef();
6421 bool Def2 = !Elems[I2].isUndef();
6422 if (Def1 || Def2) {
6423 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6424 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6425 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6426 joinDwords(DAG, DL, Elem1, Elem2));
6427 Done[I1] = true;
6428 Done[I2] = true;
6429 } else
6430 Result = DAG.getUNDEF(VT);
6431 }
6432 }
6433
6434 // Use VLVGx to insert the other elements.
6435 for (unsigned I = 0; I < NumElements; ++I)
6436 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6437 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6438 DAG.getConstant(I, DL, MVT::i32));
6439 return Result;
6440}
6441
6442SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6443 SelectionDAG &DAG) const {
6444 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6445 SDLoc DL(Op);
6446 EVT VT = Op.getValueType();
6447
6448 if (BVN->isConstant()) {
6449 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6450 return Op;
6451
6452 // Fall back to loading it from memory.
6453 return SDValue();
6454 }
6455
6456 // See if we should use shuffles to construct the vector from other vectors.
6457 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6458 return Res;
6459
6460 // Detect SCALAR_TO_VECTOR conversions.
6462 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6463
6464 // Otherwise use buildVector to build the vector up from GPRs.
6465 unsigned NumElements = Op.getNumOperands();
6467 for (unsigned I = 0; I < NumElements; ++I)
6468 Ops[I] = Op.getOperand(I);
6469 return buildVector(DAG, DL, VT, Ops);
6470}
6471
6472SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6473 SelectionDAG &DAG) const {
6474 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6475 SDLoc DL(Op);
6476 EVT VT = Op.getValueType();
6477 unsigned NumElements = VT.getVectorNumElements();
6478
6479 if (VSN->isSplat()) {
6480 SDValue Op0 = Op.getOperand(0);
6481 unsigned Index = VSN->getSplatIndex();
6482 assert(Index < VT.getVectorNumElements() &&
6483 "Splat index should be defined and in first operand");
6484 // See whether the value we're splatting is directly available as a scalar.
6485 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6487 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6488 // Otherwise keep it as a vector-to-vector operation.
6489 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6490 DAG.getTargetConstant(Index, DL, MVT::i32));
6491 }
6492
6493 GeneralShuffle GS(VT);
6494 for (unsigned I = 0; I < NumElements; ++I) {
6495 int Elt = VSN->getMaskElt(I);
6496 if (Elt < 0)
6497 GS.addUndef();
6498 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6499 unsigned(Elt) % NumElements))
6500 return SDValue();
6501 }
6502 return GS.getNode(DAG, SDLoc(VSN));
6503}
6504
6505SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6506 SelectionDAG &DAG) const {
6507 SDLoc DL(Op);
6508 // Just insert the scalar into element 0 of an undefined vector.
6509 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6510 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6511 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6512}
6513
6514SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6515 SelectionDAG &DAG) const {
6516 // Handle insertions of floating-point values.
6517 SDLoc DL(Op);
6518 SDValue Op0 = Op.getOperand(0);
6519 SDValue Op1 = Op.getOperand(1);
6520 SDValue Op2 = Op.getOperand(2);
6521 EVT VT = Op.getValueType();
6522
6523 // Insertions into constant indices of a v2f64 can be done using VPDI.
6524 // However, if the inserted value is a bitcast or a constant then it's
6525 // better to use GPRs, as below.
6526 if (VT == MVT::v2f64 &&
6527 Op1.getOpcode() != ISD::BITCAST &&
6528 Op1.getOpcode() != ISD::ConstantFP &&
6529 Op2.getOpcode() == ISD::Constant) {
6530 uint64_t Index = Op2->getAsZExtVal();
6531 unsigned Mask = VT.getVectorNumElements() - 1;
6532 if (Index <= Mask)
6533 return Op;
6534 }
6535
6536 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6538 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6539 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6540 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6541 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6542 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6543}
6544
6545SDValue
6546SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6547 SelectionDAG &DAG) const {
6548 // Handle extractions of floating-point values.
6549 SDLoc DL(Op);
6550 SDValue Op0 = Op.getOperand(0);
6551 SDValue Op1 = Op.getOperand(1);
6552 EVT VT = Op.getValueType();
6553 EVT VecVT = Op0.getValueType();
6554
6555 // Extractions of constant indices can be done directly.
6556 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6557 uint64_t Index = CIndexN->getZExtValue();
6558 unsigned Mask = VecVT.getVectorNumElements() - 1;
6559 if (Index <= Mask)
6560 return Op;
6561 }
6562
6563 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6564 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6565 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6566 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6567 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6568 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6569}
6570
6571SDValue SystemZTargetLowering::
6572lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6573 SDValue PackedOp = Op.getOperand(0);
6574 EVT OutVT = Op.getValueType();
6575 EVT InVT = PackedOp.getValueType();
6576 unsigned ToBits = OutVT.getScalarSizeInBits();
6577 unsigned FromBits = InVT.getScalarSizeInBits();
6578 unsigned StartOffset = 0;
6579
6580 // If the input is a VECTOR_SHUFFLE, there are a number of important
6581 // cases where we can directly implement the sign-extension of the
6582 // original input lanes of the shuffle.
6583 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6584 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6585 ArrayRef<int> ShuffleMask = SVN->getMask();
6586 int OutNumElts = OutVT.getVectorNumElements();
6587
6588 // Recognize the special case where the sign-extension can be done
6589 // by the VSEG instruction. Handled via the default expander.
6590 if (ToBits == 64 && OutNumElts == 2) {
6591 int NumElem = ToBits / FromBits;
6592 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6593 return SDValue();
6594 }
6595
6596 // Recognize the special case where we can fold the shuffle by
6597 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6598 int StartOffsetCandidate = -1;
6599 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6600 if (ShuffleMask[Elt] == -1)
6601 continue;
6602 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6603 if (StartOffsetCandidate == -1)
6604 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6605 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6606 continue;
6607 }
6608 StartOffsetCandidate = -1;
6609 break;
6610 }
6611 if (StartOffsetCandidate != -1) {
6612 StartOffset = StartOffsetCandidate;
6613 PackedOp = PackedOp.getOperand(0);
6614 }
6615 }
6616
6617 do {
6618 FromBits *= 2;
6619 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6620 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6621 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6622 if (StartOffset >= OutNumElts) {
6623 Opcode = SystemZISD::UNPACK_LOW;
6624 StartOffset -= OutNumElts;
6625 }
6626 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6627 } while (FromBits != ToBits);
6628 return PackedOp;
6629}
6630
6631// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6632SDValue SystemZTargetLowering::
6633lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6634 SDValue PackedOp = Op.getOperand(0);
6635 SDLoc DL(Op);
6636 EVT OutVT = Op.getValueType();
6637 EVT InVT = PackedOp.getValueType();
6638 unsigned InNumElts = InVT.getVectorNumElements();
6639 unsigned OutNumElts = OutVT.getVectorNumElements();
6640 unsigned NumInPerOut = InNumElts / OutNumElts;
6641
6642 SDValue ZeroVec =
6643 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6644
6645 SmallVector<int, 16> Mask(InNumElts);
6646 unsigned ZeroVecElt = InNumElts;
6647 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6648 unsigned MaskElt = PackedElt * NumInPerOut;
6649 unsigned End = MaskElt + NumInPerOut - 1;
6650 for (; MaskElt < End; MaskElt++)
6651 Mask[MaskElt] = ZeroVecElt++;
6652 Mask[MaskElt] = PackedElt;
6653 }
6654 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6655 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6656}
6657
6658SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6659 unsigned ByScalar) const {
6660 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6661 SDValue Op0 = Op.getOperand(0);
6662 SDValue Op1 = Op.getOperand(1);
6663 SDLoc DL(Op);
6664 EVT VT = Op.getValueType();
6665 unsigned ElemBitSize = VT.getScalarSizeInBits();
6666
6667 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6668 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6669 APInt SplatBits, SplatUndef;
6670 unsigned SplatBitSize;
6671 bool HasAnyUndefs;
6672 // Check for constant splats. Use ElemBitSize as the minimum element
6673 // width and reject splats that need wider elements.
6674 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6675 ElemBitSize, true) &&
6676 SplatBitSize == ElemBitSize) {
6677 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6678 DL, MVT::i32);
6679 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6680 }
6681 // Check for variable splats.
6682 BitVector UndefElements;
6683 SDValue Splat = BVN->getSplatValue(&UndefElements);
6684 if (Splat) {
6685 // Since i32 is the smallest legal type, we either need a no-op
6686 // or a truncation.
6687 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6688 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6689 }
6690 }
6691
6692 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6693 // and the shift amount is directly available in a GPR.
6694 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6695 if (VSN->isSplat()) {
6696 SDValue VSNOp0 = VSN->getOperand(0);
6697 unsigned Index = VSN->getSplatIndex();
6698 assert(Index < VT.getVectorNumElements() &&
6699 "Splat index should be defined and in first operand");
6700 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6701 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6702 // Since i32 is the smallest legal type, we either need a no-op
6703 // or a truncation.
6704 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6705 VSNOp0.getOperand(Index));
6706 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6707 }
6708 }
6709 }
6710
6711 // Otherwise just treat the current form as legal.
6712 return Op;
6713}
6714
6715SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6716 SDLoc DL(Op);
6717
6718 // i128 FSHL with a constant amount that is a multiple of 8 can be
6719 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6720 // facility, FSHL with a constant amount less than 8 can be implemented
6721 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6722 // combination of the two.
6723 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6724 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6725 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6726 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6727 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6728 if (ShiftAmt > 120) {
6729 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6730 // SHR_DOUBLE_BIT emits fewer instructions.
6731 SDValue Val =
6732 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6733 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6734 return DAG.getBitcast(MVT::i128, Val);
6735 }
6737 for (unsigned Elt = 0; Elt < 16; Elt++)
6738 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6739 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6740 if ((ShiftAmt & 7) == 0)
6741 return DAG.getBitcast(MVT::i128, Shuf1);
6742 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6743 SDValue Val =
6744 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6745 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6746 return DAG.getBitcast(MVT::i128, Val);
6747 }
6748 }
6749
6750 return SDValue();
6751}
6752
6753SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6754 SDLoc DL(Op);
6755
6756 // i128 FSHR with a constant amount that is a multiple of 8 can be
6757 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6758 // facility, FSHR with a constant amount less than 8 can be implemented
6759 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6760 // combination of the two.
6761 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6762 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6763 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6764 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6765 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6766 if (ShiftAmt > 120) {
6767 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6768 // SHL_DOUBLE_BIT emits fewer instructions.
6769 SDValue Val =
6770 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6771 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6772 return DAG.getBitcast(MVT::i128, Val);
6773 }
6775 for (unsigned Elt = 0; Elt < 16; Elt++)
6776 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6777 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6778 if ((ShiftAmt & 7) == 0)
6779 return DAG.getBitcast(MVT::i128, Shuf1);
6780 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6781 SDValue Val =
6782 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6783 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6784 return DAG.getBitcast(MVT::i128, Val);
6785 }
6786 }
6787
6788 return SDValue();
6789}
6790
6792 SDLoc DL(Op);
6793 SDValue Src = Op.getOperand(0);
6794 MVT DstVT = Op.getSimpleValueType();
6795
6796 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6797 unsigned SrcAS = N->getSrcAddressSpace();
6798
6799 assert(SrcAS != N->getDestAddressSpace() &&
6800 "addrspacecast must be between different address spaces");
6801
6802 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6803 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6804 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6805 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6806 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6807 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6808 } else if (DstVT == MVT::i32) {
6809 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6810 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6811 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6812 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6813 } else {
6814 report_fatal_error("Bad address space in addrspacecast");
6815 }
6816 return Op;
6817}
6818
6819SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6820 SelectionDAG &DAG) const {
6821 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6822 if (In.getSimpleValueType() != MVT::f16)
6823 return Op; // Legal
6824 return SDValue(); // Let legalizer emit the libcall.
6825}
6826
6828 MVT VT, SDValue Arg, SDLoc DL,
6829 SDValue Chain, bool IsStrict) const {
6830 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6831 MakeLibCallOptions CallOptions;
6832 SDValue Result;
6833 std::tie(Result, Chain) =
6834 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6835 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6836}
6837
6838SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6839 SelectionDAG &DAG) const {
6840 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6841 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6842 bool IsStrict = Op->isStrictFPOpcode();
6843 SDLoc DL(Op);
6844 MVT VT = Op.getSimpleValueType();
6845 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6846 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6847 EVT InVT = InOp.getValueType();
6848
6849 // FP to unsigned is not directly supported on z10. Promoting an i32
6850 // result to (signed) i64 doesn't generate an inexact condition (fp
6851 // exception) for values that are outside the i32 range but in the i64
6852 // range, so use the default expansion.
6853 if (!Subtarget.hasFPExtension() && !IsSigned)
6854 // Expand i32/i64. F16 values will be recognized to fit and extended.
6855 return SDValue();
6856
6857 // Conversion from f16 is done via f32.
6858 if (InOp.getSimpleValueType() == MVT::f16) {
6860 LowerOperationWrapper(Op.getNode(), Results, DAG);
6861 return DAG.getMergeValues(Results, DL);
6862 }
6863
6864 if (VT == MVT::i128) {
6865 RTLIB::Libcall LC =
6866 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6867 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6868 }
6869
6870 return Op; // Legal
6871}
6872
6873SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6874 SelectionDAG &DAG) const {
6875 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6876 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6877 bool IsStrict = Op->isStrictFPOpcode();
6878 SDLoc DL(Op);
6879 MVT VT = Op.getSimpleValueType();
6880 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6881 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6882 EVT InVT = InOp.getValueType();
6883
6884 // Conversion to f16 is done via f32.
6885 if (VT == MVT::f16) {
6887 LowerOperationWrapper(Op.getNode(), Results, DAG);
6888 return DAG.getMergeValues(Results, DL);
6889 }
6890
6891 // Unsigned to fp is not directly supported on z10.
6892 if (!Subtarget.hasFPExtension() && !IsSigned)
6893 return SDValue(); // Expand i64.
6894
6895 if (InVT == MVT::i128) {
6896 RTLIB::Libcall LC =
6897 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6898 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6899 }
6900
6901 return Op; // Legal
6902}
6903
6904// Shift the lower 2 bytes of Op to the left in order to insert into the
6905// upper 2 bytes of the FP register.
6907 assert(Op.getSimpleValueType() == MVT::i64 &&
6908 "Expexted to convert i64 to f16.");
6909 SDLoc DL(Op);
6910 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6911 DAG.getConstant(48, DL, MVT::i64));
6912 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6913 SDValue F16Val =
6914 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6915 return F16Val;
6916}
6917
6918// Extract Op into GPR and shift the 2 f16 bytes to the right.
6920 assert(Op.getSimpleValueType() == MVT::f16 &&
6921 "Expected to convert f16 to i64.");
6922 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6923 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6924 SDValue(U32, 0), Op);
6925 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6926 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6927 DAG.getConstant(48, DL, MVT::i32));
6928 return Shft;
6929}
6930
6931// Lower an f16 LOAD in case of no vector support.
6932SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6933 SelectionDAG &DAG) const {
6934 EVT RegVT = Op.getValueType();
6935 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6936 (void)RegVT;
6937
6938 // Load as integer.
6939 SDLoc DL(Op);
6940 SDValue NewLd;
6941 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6942 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6943 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
6944 AtomicLd->getChain(), AtomicLd->getBasePtr(),
6945 AtomicLd->getMemOperand());
6946 } else {
6947 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6948 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
6949 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
6950 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
6951 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6952 }
6953 SDValue F16Val = convertToF16(NewLd, DAG);
6954 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
6955}
6956
6957// Lower an f16 STORE in case of no vector support.
6958SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
6959 SelectionDAG &DAG) const {
6960 SDLoc DL(Op);
6961 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
6962
6963 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
6964 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
6965 Shft, AtomicSt->getBasePtr(),
6966 AtomicSt->getMemOperand());
6967
6968 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
6969 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
6970 St->getMemOperand());
6971}
6972
6973SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6974 SelectionDAG &DAG) const {
6975 SDLoc DL(Op);
6976 MVT ResultVT = Op.getSimpleValueType();
6977 SDValue Arg = Op.getOperand(0);
6978 unsigned Check = Op.getConstantOperandVal(1);
6979
6980 unsigned TDCMask = 0;
6981 if (Check & fcSNan)
6983 if (Check & fcQNan)
6985 if (Check & fcPosInf)
6987 if (Check & fcNegInf)
6989 if (Check & fcPosNormal)
6991 if (Check & fcNegNormal)
6993 if (Check & fcPosSubnormal)
6995 if (Check & fcNegSubnormal)
6997 if (Check & fcPosZero)
6998 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6999 if (Check & fcNegZero)
7000 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7001 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7002
7003 if (Arg.getSimpleValueType() == MVT::f16)
7004 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7005 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7006 return getCCResult(DAG, Intr);
7007}
7008
7009SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7010 SelectionDAG &DAG) const {
7011 SDLoc DL(Op);
7012 SDValue Chain = Op.getOperand(0);
7013
7014 // STCKF only supports a memory operand, so we have to use a temporary.
7015 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7016 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7017 MachinePointerInfo MPI =
7019
7020 // Use STCFK to store the TOD clock into the temporary.
7021 SDValue StoreOps[] = {Chain, StackPtr};
7022 Chain = DAG.getMemIntrinsicNode(
7023 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7025
7026 // And read it back from there.
7027 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7028}
7029
7031 SelectionDAG &DAG) const {
7032 switch (Op.getOpcode()) {
7033 case ISD::FRAMEADDR:
7034 return lowerFRAMEADDR(Op, DAG);
7035 case ISD::RETURNADDR:
7036 return lowerRETURNADDR(Op, DAG);
7037 case ISD::BR_CC:
7038 return lowerBR_CC(Op, DAG);
7039 case ISD::SELECT_CC:
7040 return lowerSELECT_CC(Op, DAG);
7041 case ISD::SETCC:
7042 return lowerSETCC(Op, DAG);
7043 case ISD::STRICT_FSETCC:
7044 return lowerSTRICT_FSETCC(Op, DAG, false);
7046 return lowerSTRICT_FSETCC(Op, DAG, true);
7047 case ISD::GlobalAddress:
7048 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7050 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7051 case ISD::BlockAddress:
7052 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7053 case ISD::JumpTable:
7054 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7055 case ISD::ConstantPool:
7056 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7057 case ISD::BITCAST:
7058 return lowerBITCAST(Op, DAG);
7059 case ISD::VASTART:
7060 return lowerVASTART(Op, DAG);
7061 case ISD::VACOPY:
7062 return lowerVACOPY(Op, DAG);
7064 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7066 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7067 case ISD::MULHS:
7068 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7069 case ISD::MULHU:
7070 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7071 case ISD::SMUL_LOHI:
7072 return lowerSMUL_LOHI(Op, DAG);
7073 case ISD::UMUL_LOHI:
7074 return lowerUMUL_LOHI(Op, DAG);
7075 case ISD::SDIVREM:
7076 return lowerSDIVREM(Op, DAG);
7077 case ISD::UDIVREM:
7078 return lowerUDIVREM(Op, DAG);
7079 case ISD::SADDO:
7080 case ISD::SSUBO:
7081 case ISD::UADDO:
7082 case ISD::USUBO:
7083 return lowerXALUO(Op, DAG);
7084 case ISD::UADDO_CARRY:
7085 case ISD::USUBO_CARRY:
7086 return lowerUADDSUBO_CARRY(Op, DAG);
7087 case ISD::OR:
7088 return lowerOR(Op, DAG);
7089 case ISD::CTPOP:
7090 return lowerCTPOP(Op, DAG);
7091 case ISD::VECREDUCE_ADD:
7092 return lowerVECREDUCE_ADD(Op, DAG);
7093 case ISD::ATOMIC_FENCE:
7094 return lowerATOMIC_FENCE(Op, DAG);
7095 case ISD::ATOMIC_SWAP:
7096 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7097 case ISD::ATOMIC_STORE:
7098 return lowerATOMIC_STORE(Op, DAG);
7099 case ISD::ATOMIC_LOAD:
7100 return lowerATOMIC_LOAD(Op, DAG);
7102 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7104 return lowerATOMIC_LOAD_SUB(Op, DAG);
7106 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7108 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7110 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7112 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7114 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7116 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7118 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7120 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7122 return lowerATOMIC_CMP_SWAP(Op, DAG);
7123 case ISD::STACKSAVE:
7124 return lowerSTACKSAVE(Op, DAG);
7125 case ISD::STACKRESTORE:
7126 return lowerSTACKRESTORE(Op, DAG);
7127 case ISD::PREFETCH:
7128 return lowerPREFETCH(Op, DAG);
7130 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7132 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7133 case ISD::BUILD_VECTOR:
7134 return lowerBUILD_VECTOR(Op, DAG);
7136 return lowerVECTOR_SHUFFLE(Op, DAG);
7138 return lowerSCALAR_TO_VECTOR(Op, DAG);
7140 return lowerINSERT_VECTOR_ELT(Op, DAG);
7142 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7144 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7146 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7147 case ISD::SHL:
7148 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7149 case ISD::SRL:
7150 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7151 case ISD::SRA:
7152 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7153 case ISD::ADDRSPACECAST:
7154 return lowerAddrSpaceCast(Op, DAG);
7155 case ISD::ROTL:
7156 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7157 case ISD::FSHL:
7158 return lowerFSHL(Op, DAG);
7159 case ISD::FSHR:
7160 return lowerFSHR(Op, DAG);
7161 case ISD::FP_EXTEND:
7163 return lowerFP_EXTEND(Op, DAG);
7164 case ISD::FP_TO_UINT:
7165 case ISD::FP_TO_SINT:
7168 return lower_FP_TO_INT(Op, DAG);
7169 case ISD::UINT_TO_FP:
7170 case ISD::SINT_TO_FP:
7173 return lower_INT_TO_FP(Op, DAG);
7174 case ISD::LOAD:
7175 return lowerLoadF16(Op, DAG);
7176 case ISD::STORE:
7177 return lowerStoreF16(Op, DAG);
7178 case ISD::IS_FPCLASS:
7179 return lowerIS_FPCLASS(Op, DAG);
7180 case ISD::GET_ROUNDING:
7181 return lowerGET_ROUNDING(Op, DAG);
7183 return lowerREADCYCLECOUNTER(Op, DAG);
7186 // These operations are legal on our platform, but we cannot actually
7187 // set the operation action to Legal as common code would treat this
7188 // as equivalent to Expand. Instead, we keep the operation action to
7189 // Custom and just leave them unchanged here.
7190 return Op;
7191
7192 default:
7193 llvm_unreachable("Unexpected node to lower");
7194 }
7195}
7196
7198 const SDLoc &SL) {
7199 // If i128 is legal, just use a normal bitcast.
7200 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7201 return DAG.getBitcast(MVT::f128, Src);
7202
7203 // Otherwise, f128 must live in FP128, so do a partwise move.
7205 &SystemZ::FP128BitRegClass);
7206
7207 SDValue Hi, Lo;
7208 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7209
7210 Hi = DAG.getBitcast(MVT::f64, Hi);
7211 Lo = DAG.getBitcast(MVT::f64, Lo);
7212
7213 SDNode *Pair = DAG.getMachineNode(
7214 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7215 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7216 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7217 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7218 return SDValue(Pair, 0);
7219}
7220
7222 const SDLoc &SL) {
7223 // If i128 is legal, just use a normal bitcast.
7224 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7225 return DAG.getBitcast(MVT::i128, Src);
7226
7227 // Otherwise, f128 must live in FP128, so do a partwise move.
7229 &SystemZ::FP128BitRegClass);
7230
7231 SDValue LoFP =
7232 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7233 SDValue HiFP =
7234 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7235 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7236 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7237
7238 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7239}
7240
7241// Lower operations with invalid operand or result types.
7242void
7245 SelectionDAG &DAG) const {
7246 switch (N->getOpcode()) {
7247 case ISD::ATOMIC_LOAD: {
7248 SDLoc DL(N);
7249 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7250 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7251 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7253 DL, Tys, Ops, MVT::i128, MMO);
7254
7255 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7256 if (N->getValueType(0) == MVT::f128)
7257 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7258 Results.push_back(Lowered);
7259 Results.push_back(Res.getValue(1));
7260 break;
7261 }
7262 case ISD::ATOMIC_STORE: {
7263 SDLoc DL(N);
7264 SDVTList Tys = DAG.getVTList(MVT::Other);
7265 SDValue Val = N->getOperand(1);
7266 if (Val.getValueType() == MVT::f128)
7267 Val = expandBitCastF128ToI128(DAG, Val, DL);
7268 Val = lowerI128ToGR128(DAG, Val);
7269
7270 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7271 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7273 DL, Tys, Ops, MVT::i128, MMO);
7274 // We have to enforce sequential consistency by performing a
7275 // serialization operation after the store.
7276 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7278 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7279 MVT::Other, Res), 0);
7280 Results.push_back(Res);
7281 break;
7282 }
7284 SDLoc DL(N);
7285 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7286 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7287 lowerI128ToGR128(DAG, N->getOperand(2)),
7288 lowerI128ToGR128(DAG, N->getOperand(3)) };
7289 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7291 DL, Tys, Ops, MVT::i128, MMO);
7292 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7294 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7295 Results.push_back(lowerGR128ToI128(DAG, Res));
7296 Results.push_back(Success);
7297 Results.push_back(Res.getValue(2));
7298 break;
7299 }
7300 case ISD::BITCAST: {
7301 if (useSoftFloat())
7302 return;
7303 SDLoc DL(N);
7304 SDValue Src = N->getOperand(0);
7305 EVT SrcVT = Src.getValueType();
7306 EVT ResVT = N->getValueType(0);
7307 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7308 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7309 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7310 if (Subtarget.hasVector()) {
7311 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7312 Results.push_back(SDValue(
7313 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7314 } else {
7315 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7316 Results.push_back(convertToF16(In64, DAG));
7317 }
7318 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7319 SDValue ExtractedVal =
7320 Subtarget.hasVector()
7321 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7322 0)
7323 : convertFromF16(Src, DL, DAG);
7324 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7325 }
7326 break;
7327 }
7328 case ISD::UINT_TO_FP:
7329 case ISD::SINT_TO_FP:
7332 if (useSoftFloat())
7333 return;
7334 bool IsStrict = N->isStrictFPOpcode();
7335 SDLoc DL(N);
7336 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7337 EVT ResVT = N->getValueType(0);
7338 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7339 if (ResVT == MVT::f16) {
7340 if (!IsStrict) {
7341 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7342 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7343 } else {
7344 SDValue OpF32 =
7345 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7346 {Chain, InOp});
7347 SDValue F16Res;
7348 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7349 OpF32, OpF32.getValue(1), DL, MVT::f16);
7350 Results.push_back(F16Res);
7351 Results.push_back(Chain);
7352 }
7353 }
7354 break;
7355 }
7356 case ISD::FP_TO_UINT:
7357 case ISD::FP_TO_SINT:
7360 if (useSoftFloat())
7361 return;
7362 bool IsStrict = N->isStrictFPOpcode();
7363 SDLoc DL(N);
7364 EVT ResVT = N->getValueType(0);
7365 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7366 EVT InVT = InOp->getValueType(0);
7367 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7368 if (InVT == MVT::f16) {
7369 if (!IsStrict) {
7370 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7371 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7372 } else {
7373 SDValue InF32;
7374 std::tie(InF32, Chain) =
7375 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7376 SDValue OpF32 =
7377 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7378 {Chain, InF32});
7379 Results.push_back(OpF32);
7380 Results.push_back(OpF32.getValue(1));
7381 }
7382 }
7383 break;
7384 }
7385 default:
7386 llvm_unreachable("Unexpected node to lower");
7387 }
7388}
7389
7390void
7393 SelectionDAG &DAG) const {
7394 return LowerOperationWrapper(N, Results, DAG);
7395}
7396
7397const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
7398#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
7399 switch ((SystemZISD::NodeType)Opcode) {
7400 case SystemZISD::FIRST_NUMBER: break;
7401 OPCODE(RET_GLUE);
7402 OPCODE(CALL);
7403 OPCODE(SIBCALL);
7404 OPCODE(TLS_GDCALL);
7405 OPCODE(TLS_LDCALL);
7406 OPCODE(PCREL_WRAPPER);
7407 OPCODE(PCREL_OFFSET);
7408 OPCODE(ICMP);
7409 OPCODE(FCMP);
7410 OPCODE(STRICT_FCMP);
7411 OPCODE(STRICT_FCMPS);
7412 OPCODE(TM);
7413 OPCODE(BR_CCMASK);
7414 OPCODE(SELECT_CCMASK);
7415 OPCODE(ADJDYNALLOC);
7416 OPCODE(PROBED_ALLOCA);
7417 OPCODE(POPCNT);
7418 OPCODE(SMUL_LOHI);
7419 OPCODE(UMUL_LOHI);
7420 OPCODE(SDIVREM);
7421 OPCODE(UDIVREM);
7422 OPCODE(SADDO);
7423 OPCODE(SSUBO);
7424 OPCODE(UADDO);
7425 OPCODE(USUBO);
7426 OPCODE(ADDCARRY);
7427 OPCODE(SUBCARRY);
7428 OPCODE(GET_CCMASK);
7429 OPCODE(MVC);
7430 OPCODE(NC);
7431 OPCODE(OC);
7432 OPCODE(XC);
7433 OPCODE(CLC);
7434 OPCODE(MEMSET_MVC);
7435 OPCODE(STPCPY);
7436 OPCODE(STRCMP);
7437 OPCODE(SEARCH_STRING);
7438 OPCODE(IPM);
7439 OPCODE(TBEGIN);
7440 OPCODE(TBEGIN_NOFLOAT);
7441 OPCODE(TEND);
7442 OPCODE(BYTE_MASK);
7443 OPCODE(ROTATE_MASK);
7444 OPCODE(REPLICATE);
7445 OPCODE(JOIN_DWORDS);
7446 OPCODE(SPLAT);
7447 OPCODE(MERGE_HIGH);
7448 OPCODE(MERGE_LOW);
7449 OPCODE(SHL_DOUBLE);
7450 OPCODE(PERMUTE_DWORDS);
7451 OPCODE(PERMUTE);
7452 OPCODE(PACK);
7453 OPCODE(PACKS_CC);
7454 OPCODE(PACKLS_CC);
7455 OPCODE(UNPACK_HIGH);
7456 OPCODE(UNPACKL_HIGH);
7457 OPCODE(UNPACK_LOW);
7458 OPCODE(UNPACKL_LOW);
7459 OPCODE(VSHL_BY_SCALAR);
7460 OPCODE(VSRL_BY_SCALAR);
7461 OPCODE(VSRA_BY_SCALAR);
7462 OPCODE(VROTL_BY_SCALAR);
7463 OPCODE(SHL_DOUBLE_BIT);
7464 OPCODE(SHR_DOUBLE_BIT);
7465 OPCODE(VSUM);
7466 OPCODE(VACC);
7467 OPCODE(VSCBI);
7468 OPCODE(VAC);
7469 OPCODE(VSBI);
7470 OPCODE(VACCC);
7471 OPCODE(VSBCBI);
7472 OPCODE(VMAH);
7473 OPCODE(VMALH);
7474 OPCODE(VME);
7475 OPCODE(VMLE);
7476 OPCODE(VMO);
7477 OPCODE(VMLO);
7478 OPCODE(VICMPE);
7479 OPCODE(VICMPH);
7480 OPCODE(VICMPHL);
7481 OPCODE(VICMPES);
7482 OPCODE(VICMPHS);
7483 OPCODE(VICMPHLS);
7484 OPCODE(VFCMPE);
7485 OPCODE(STRICT_VFCMPE);
7486 OPCODE(STRICT_VFCMPES);
7487 OPCODE(VFCMPH);
7488 OPCODE(STRICT_VFCMPH);
7489 OPCODE(STRICT_VFCMPHS);
7490 OPCODE(VFCMPHE);
7491 OPCODE(STRICT_VFCMPHE);
7492 OPCODE(STRICT_VFCMPHES);
7493 OPCODE(VFCMPES);
7494 OPCODE(VFCMPHS);
7495 OPCODE(VFCMPHES);
7496 OPCODE(VFTCI);
7497 OPCODE(VEXTEND);
7498 OPCODE(STRICT_VEXTEND);
7499 OPCODE(VROUND);
7500 OPCODE(STRICT_VROUND);
7501 OPCODE(VTM);
7502 OPCODE(SCMP128HI);
7503 OPCODE(UCMP128HI);
7504 OPCODE(VFAE_CC);
7505 OPCODE(VFAEZ_CC);
7506 OPCODE(VFEE_CC);
7507 OPCODE(VFEEZ_CC);
7508 OPCODE(VFENE_CC);
7509 OPCODE(VFENEZ_CC);
7510 OPCODE(VISTR_CC);
7511 OPCODE(VSTRC_CC);
7512 OPCODE(VSTRCZ_CC);
7513 OPCODE(VSTRS_CC);
7514 OPCODE(VSTRSZ_CC);
7515 OPCODE(TDC);
7516 OPCODE(ATOMIC_SWAPW);
7517 OPCODE(ATOMIC_LOADW_ADD);
7518 OPCODE(ATOMIC_LOADW_SUB);
7519 OPCODE(ATOMIC_LOADW_AND);
7520 OPCODE(ATOMIC_LOADW_OR);
7521 OPCODE(ATOMIC_LOADW_XOR);
7522 OPCODE(ATOMIC_LOADW_NAND);
7523 OPCODE(ATOMIC_LOADW_MIN);
7524 OPCODE(ATOMIC_LOADW_MAX);
7525 OPCODE(ATOMIC_LOADW_UMIN);
7526 OPCODE(ATOMIC_LOADW_UMAX);
7527 OPCODE(ATOMIC_CMP_SWAPW);
7528 OPCODE(ATOMIC_CMP_SWAP);
7529 OPCODE(ATOMIC_LOAD_128);
7530 OPCODE(ATOMIC_STORE_128);
7531 OPCODE(ATOMIC_CMP_SWAP_128);
7532 OPCODE(LRV);
7533 OPCODE(STRV);
7534 OPCODE(VLER);
7535 OPCODE(VSTER);
7536 OPCODE(STCKF);
7538 OPCODE(ADA_ENTRY);
7539 }
7540 return nullptr;
7541#undef OPCODE
7542}
7543
7544// Return true if VT is a vector whose elements are a whole number of bytes
7545// in width. Also check for presence of vector support.
7546bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7547 if (!Subtarget.hasVector())
7548 return false;
7549
7550 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7551}
7552
7553// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7554// producing a result of type ResVT. Op is a possibly bitcast version
7555// of the input vector and Index is the index (based on type VecVT) that
7556// should be extracted. Return the new extraction if a simplification
7557// was possible or if Force is true.
7558SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7559 EVT VecVT, SDValue Op,
7560 unsigned Index,
7561 DAGCombinerInfo &DCI,
7562 bool Force) const {
7563 SelectionDAG &DAG = DCI.DAG;
7564
7565 // The number of bytes being extracted.
7566 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7567
7568 for (;;) {
7569 unsigned Opcode = Op.getOpcode();
7570 if (Opcode == ISD::BITCAST)
7571 // Look through bitcasts.
7572 Op = Op.getOperand(0);
7573 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7574 canTreatAsByteVector(Op.getValueType())) {
7575 // Get a VPERM-like permute mask and see whether the bytes covered
7576 // by the extracted element are a contiguous sequence from one
7577 // source operand.
7579 if (!getVPermMask(Op, Bytes))
7580 break;
7581 int First;
7582 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7583 BytesPerElement, First))
7584 break;
7585 if (First < 0)
7586 return DAG.getUNDEF(ResVT);
7587 // Make sure the contiguous sequence starts at a multiple of the
7588 // original element size.
7589 unsigned Byte = unsigned(First) % Bytes.size();
7590 if (Byte % BytesPerElement != 0)
7591 break;
7592 // We can get the extracted value directly from an input.
7593 Index = Byte / BytesPerElement;
7594 Op = Op.getOperand(unsigned(First) / Bytes.size());
7595 Force = true;
7596 } else if (Opcode == ISD::BUILD_VECTOR &&
7597 canTreatAsByteVector(Op.getValueType())) {
7598 // We can only optimize this case if the BUILD_VECTOR elements are
7599 // at least as wide as the extracted value.
7600 EVT OpVT = Op.getValueType();
7601 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7602 if (OpBytesPerElement < BytesPerElement)
7603 break;
7604 // Make sure that the least-significant bit of the extracted value
7605 // is the least significant bit of an input.
7606 unsigned End = (Index + 1) * BytesPerElement;
7607 if (End % OpBytesPerElement != 0)
7608 break;
7609 // We're extracting the low part of one operand of the BUILD_VECTOR.
7610 Op = Op.getOperand(End / OpBytesPerElement - 1);
7611 if (!Op.getValueType().isInteger()) {
7612 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7613 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7614 DCI.AddToWorklist(Op.getNode());
7615 }
7616 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7617 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7618 if (VT != ResVT) {
7619 DCI.AddToWorklist(Op.getNode());
7620 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7621 }
7622 return Op;
7623 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7625 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7626 canTreatAsByteVector(Op.getValueType()) &&
7627 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7628 // Make sure that only the unextended bits are significant.
7629 EVT ExtVT = Op.getValueType();
7630 EVT OpVT = Op.getOperand(0).getValueType();
7631 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7632 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7633 unsigned Byte = Index * BytesPerElement;
7634 unsigned SubByte = Byte % ExtBytesPerElement;
7635 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7636 if (SubByte < MinSubByte ||
7637 SubByte + BytesPerElement > ExtBytesPerElement)
7638 break;
7639 // Get the byte offset of the unextended element
7640 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7641 // ...then add the byte offset relative to that element.
7642 Byte += SubByte - MinSubByte;
7643 if (Byte % BytesPerElement != 0)
7644 break;
7645 Op = Op.getOperand(0);
7646 Index = Byte / BytesPerElement;
7647 Force = true;
7648 } else
7649 break;
7650 }
7651 if (Force) {
7652 if (Op.getValueType() != VecVT) {
7653 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7654 DCI.AddToWorklist(Op.getNode());
7655 }
7656 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7657 DAG.getConstant(Index, DL, MVT::i32));
7658 }
7659 return SDValue();
7660}
7661
7662// Optimize vector operations in scalar value Op on the basis that Op
7663// is truncated to TruncVT.
7664SDValue SystemZTargetLowering::combineTruncateExtract(
7665 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7666 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7667 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7668 // of type TruncVT.
7669 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7670 TruncVT.getSizeInBits() % 8 == 0) {
7671 SDValue Vec = Op.getOperand(0);
7672 EVT VecVT = Vec.getValueType();
7673 if (canTreatAsByteVector(VecVT)) {
7674 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7675 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7676 unsigned TruncBytes = TruncVT.getStoreSize();
7677 if (BytesPerElement % TruncBytes == 0) {
7678 // Calculate the value of Y' in the above description. We are
7679 // splitting the original elements into Scale equal-sized pieces
7680 // and for truncation purposes want the last (least-significant)
7681 // of these pieces for IndexN. This is easiest to do by calculating
7682 // the start index of the following element and then subtracting 1.
7683 unsigned Scale = BytesPerElement / TruncBytes;
7684 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7685
7686 // Defer the creation of the bitcast from X to combineExtract,
7687 // which might be able to optimize the extraction.
7688 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7689 MVT::getIntegerVT(TruncBytes * 8),
7690 VecVT.getStoreSize() / TruncBytes);
7691 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7692 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7693 }
7694 }
7695 }
7696 }
7697 return SDValue();
7698}
7699
7700SDValue SystemZTargetLowering::combineZERO_EXTEND(
7701 SDNode *N, DAGCombinerInfo &DCI) const {
7702 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7703 SelectionDAG &DAG = DCI.DAG;
7704 SDValue N0 = N->getOperand(0);
7705 EVT VT = N->getValueType(0);
7707 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7708 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7709 if (TrueOp && FalseOp) {
7710 SDLoc DL(N0);
7711 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7712 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7713 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7714 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7715 // If N0 has multiple uses, change other uses as well.
7716 if (!N0.hasOneUse()) {
7717 SDValue TruncSelect =
7718 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7719 DCI.CombineTo(N0.getNode(), TruncSelect);
7720 }
7721 return NewSelect;
7722 }
7723 }
7724 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7725 // of the result is smaller than the size of X and all the truncated bits
7726 // of X are already zero.
7727 if (N0.getOpcode() == ISD::XOR &&
7728 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7729 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7730 N0.getOperand(1).getOpcode() == ISD::Constant) {
7731 SDValue X = N0.getOperand(0).getOperand(0);
7732 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7733 KnownBits Known = DAG.computeKnownBits(X);
7734 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7735 N0.getValueSizeInBits(),
7736 VT.getSizeInBits());
7737 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7738 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7740 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7741 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7742 }
7743 }
7744 }
7745 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7746 // and VECTOR ADD COMPUTE CARRY for i128:
7747 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7748 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7749 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7750 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7751 // For vector types, these patterns are recognized in the .td file.
7752 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7753 N0.getOperand(0).getValueType() == VT) {
7754 SDValue Op0 = N0.getOperand(0);
7755 SDValue Op1 = N0.getOperand(1);
7756 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7757 switch (CC) {
7758 case ISD::SETULE:
7759 std::swap(Op0, Op1);
7760 [[fallthrough]];
7761 case ISD::SETUGE:
7762 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7763 case ISD::SETUGT:
7764 std::swap(Op0, Op1);
7765 [[fallthrough]];
7766 case ISD::SETULT:
7767 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7768 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7769 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7770 Op0->getOperand(1));
7771 break;
7772 default:
7773 break;
7774 }
7775 }
7776
7777 return SDValue();
7778}
7779
7780SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7781 SDNode *N, DAGCombinerInfo &DCI) const {
7782 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7783 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7784 // into (select_cc LHS, RHS, -1, 0, COND)
7785 SelectionDAG &DAG = DCI.DAG;
7786 SDValue N0 = N->getOperand(0);
7787 EVT VT = N->getValueType(0);
7788 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7789 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7790 N0 = N0.getOperand(0);
7791 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7792 SDLoc DL(N0);
7793 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7794 DAG.getAllOnesConstant(DL, VT),
7795 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7796 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7797 }
7798 return SDValue();
7799}
7800
7801SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7802 SDNode *N, DAGCombinerInfo &DCI) const {
7803 // Convert (sext (ashr (shl X, C1), C2)) to
7804 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7805 // cheap as narrower ones.
7806 SelectionDAG &DAG = DCI.DAG;
7807 SDValue N0 = N->getOperand(0);
7808 EVT VT = N->getValueType(0);
7809 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7810 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7811 SDValue Inner = N0.getOperand(0);
7812 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7813 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7814 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7815 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7816 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7817 EVT ShiftVT = N0.getOperand(1).getValueType();
7818 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7819 Inner.getOperand(0));
7820 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7821 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7822 ShiftVT));
7823 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7824 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7825 }
7826 }
7827 }
7828
7829 return SDValue();
7830}
7831
7832SDValue SystemZTargetLowering::combineMERGE(
7833 SDNode *N, DAGCombinerInfo &DCI) const {
7834 SelectionDAG &DAG = DCI.DAG;
7835 unsigned Opcode = N->getOpcode();
7836 SDValue Op0 = N->getOperand(0);
7837 SDValue Op1 = N->getOperand(1);
7838 if (Op0.getOpcode() == ISD::BITCAST)
7839 Op0 = Op0.getOperand(0);
7841 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7842 // for v4f32.
7843 if (Op1 == N->getOperand(0))
7844 return Op1;
7845 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7846 EVT VT = Op1.getValueType();
7847 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7848 if (ElemBytes <= 4) {
7849 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7852 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7853 SystemZ::VectorBytes / ElemBytes / 2);
7854 if (VT != InVT) {
7855 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7856 DCI.AddToWorklist(Op1.getNode());
7857 }
7858 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7859 DCI.AddToWorklist(Op.getNode());
7860 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7861 }
7862 }
7863 return SDValue();
7864}
7865
7866static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7867 SDNode *&HiPart) {
7868 LoPart = HiPart = nullptr;
7869
7870 // Scan through all users.
7871 for (SDUse &Use : LD->uses()) {
7872 // Skip the uses of the chain.
7873 if (Use.getResNo() != 0)
7874 continue;
7875
7876 // Verify every user is a TRUNCATE to i64 of the low or high half.
7877 SDNode *User = Use.getUser();
7878 bool IsLoPart = true;
7879 if (User->getOpcode() == ISD::SRL &&
7880 User->getOperand(1).getOpcode() == ISD::Constant &&
7881 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7882 User = *User->user_begin();
7883 IsLoPart = false;
7884 }
7885 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7886 return false;
7887
7888 if (IsLoPart) {
7889 if (LoPart)
7890 return false;
7891 LoPart = User;
7892 } else {
7893 if (HiPart)
7894 return false;
7895 HiPart = User;
7896 }
7897 }
7898 return true;
7899}
7900
7901static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7902 SDNode *&HiPart) {
7903 LoPart = HiPart = nullptr;
7904
7905 // Scan through all users.
7906 for (SDUse &Use : LD->uses()) {
7907 // Skip the uses of the chain.
7908 if (Use.getResNo() != 0)
7909 continue;
7910
7911 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7912 SDNode *User = Use.getUser();
7913 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7914 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7915 return false;
7916
7917 switch (User->getConstantOperandVal(1)) {
7918 case SystemZ::subreg_l64:
7919 if (LoPart)
7920 return false;
7921 LoPart = User;
7922 break;
7923 case SystemZ::subreg_h64:
7924 if (HiPart)
7925 return false;
7926 HiPart = User;
7927 break;
7928 default:
7929 return false;
7930 }
7931 }
7932 return true;
7933}
7934
7935SDValue SystemZTargetLowering::combineLOAD(
7936 SDNode *N, DAGCombinerInfo &DCI) const {
7937 SelectionDAG &DAG = DCI.DAG;
7938 EVT LdVT = N->getValueType(0);
7939 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7940 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7941 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7942 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7943 if (PtrVT != LoadNodeVT) {
7944 SDLoc DL(LN);
7945 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7946 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7947 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7948 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7949 LN->getMemOperand());
7950 }
7951 }
7952 }
7953 SDLoc DL(N);
7954
7955 // Replace a 128-bit load that is used solely to move its value into GPRs
7956 // by separate loads of both halves.
7957 LoadSDNode *LD = cast<LoadSDNode>(N);
7958 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7959 SDNode *LoPart, *HiPart;
7960 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7961 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7962 // Rewrite each extraction as an independent load.
7963 SmallVector<SDValue, 2> ArgChains;
7964 if (HiPart) {
7965 SDValue EltLoad = DAG.getLoad(
7966 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7967 LD->getPointerInfo(), LD->getBaseAlign(),
7968 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7969
7970 DCI.CombineTo(HiPart, EltLoad, true);
7971 ArgChains.push_back(EltLoad.getValue(1));
7972 }
7973 if (LoPart) {
7974 SDValue EltLoad = DAG.getLoad(
7975 LoPart->getValueType(0), DL, LD->getChain(),
7976 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7977 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7978 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7979
7980 DCI.CombineTo(LoPart, EltLoad, true);
7981 ArgChains.push_back(EltLoad.getValue(1));
7982 }
7983
7984 // Collect all chains via TokenFactor.
7985 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7986 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7987 DCI.AddToWorklist(Chain.getNode());
7988 return SDValue(N, 0);
7989 }
7990 }
7991
7992 if (LdVT.isVector() || LdVT.isInteger())
7993 return SDValue();
7994 // Transform a scalar load that is REPLICATEd as well as having other
7995 // use(s) to the form where the other use(s) use the first element of the
7996 // REPLICATE instead of the load. Otherwise instruction selection will not
7997 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7998 // point loads.
7999
8000 SDValue Replicate;
8001 SmallVector<SDNode*, 8> OtherUses;
8002 for (SDUse &Use : N->uses()) {
8003 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
8004 if (Replicate)
8005 return SDValue(); // Should never happen
8006 Replicate = SDValue(Use.getUser(), 0);
8007 } else if (Use.getResNo() == 0)
8008 OtherUses.push_back(Use.getUser());
8009 }
8010 if (!Replicate || OtherUses.empty())
8011 return SDValue();
8012
8013 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
8014 Replicate, DAG.getConstant(0, DL, MVT::i32));
8015 // Update uses of the loaded Value while preserving old chains.
8016 for (SDNode *U : OtherUses) {
8018 for (SDValue Op : U->ops())
8019 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8020 DAG.UpdateNodeOperands(U, Ops);
8021 }
8022 return SDValue(N, 0);
8023}
8024
8025bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8026 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8027 return true;
8028 if (Subtarget.hasVectorEnhancements2())
8029 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8030 return true;
8031 return false;
8032}
8033
8035 if (!VT.isVector() || !VT.isSimple() ||
8036 VT.getSizeInBits() != 128 ||
8037 VT.getScalarSizeInBits() % 8 != 0)
8038 return false;
8039
8040 unsigned NumElts = VT.getVectorNumElements();
8041 for (unsigned i = 0; i < NumElts; ++i) {
8042 if (M[i] < 0) continue; // ignore UNDEF indices
8043 if ((unsigned) M[i] != NumElts - 1 - i)
8044 return false;
8045 }
8046
8047 return true;
8048}
8049
8050static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8051 for (auto *U : StoredVal->users()) {
8052 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8053 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8054 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8055 continue;
8056 } else if (isa<BuildVectorSDNode>(U)) {
8057 SDValue BuildVector = SDValue(U, 0);
8058 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8059 isOnlyUsedByStores(BuildVector, DAG))
8060 continue;
8061 }
8062 return false;
8063 }
8064 return true;
8065}
8066
8067static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8068 SDValue &HiPart) {
8069 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8070 return false;
8071
8072 SDValue Op0 = Val.getOperand(0);
8073 SDValue Op1 = Val.getOperand(1);
8074
8075 if (Op0.getOpcode() == ISD::SHL)
8076 std::swap(Op0, Op1);
8077 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8078 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8079 Op1.getConstantOperandVal(1) != 64)
8080 return false;
8081 Op1 = Op1.getOperand(0);
8082
8083 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8084 Op0.getOperand(0).getValueType() != MVT::i64)
8085 return false;
8086 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8087 Op1.getOperand(0).getValueType() != MVT::i64)
8088 return false;
8089
8090 LoPart = Op0.getOperand(0);
8091 HiPart = Op1.getOperand(0);
8092 return true;
8093}
8094
8095static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8096 SDValue &HiPart) {
8097 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8098 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8099 return false;
8100
8101 if (Val->getNumOperands() != 5 ||
8102 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8103 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8104 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8105 return false;
8106
8107 LoPart = Val->getOperand(1);
8108 HiPart = Val->getOperand(3);
8109 return true;
8110}
8111
8112SDValue SystemZTargetLowering::combineSTORE(
8113 SDNode *N, DAGCombinerInfo &DCI) const {
8114 SelectionDAG &DAG = DCI.DAG;
8115 auto *SN = cast<StoreSDNode>(N);
8116 auto &Op1 = N->getOperand(1);
8117 EVT MemVT = SN->getMemoryVT();
8118
8119 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8120 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8121 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8122 if (PtrVT != StoreNodeVT) {
8123 SDLoc DL(SN);
8124 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8125 SYSTEMZAS::PTR32, 0);
8126 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8127 SN->getPointerInfo(), SN->getBaseAlign(),
8128 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8129 }
8130 }
8131
8132 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8133 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8134 // If X has wider elements then convert it to:
8135 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8136 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8137 if (SDValue Value =
8138 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8139 DCI.AddToWorklist(Value.getNode());
8140
8141 // Rewrite the store with the new form of stored value.
8142 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8143 SN->getBasePtr(), SN->getMemoryVT(),
8144 SN->getMemOperand());
8145 }
8146 }
8147 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8148 if (!SN->isTruncatingStore() &&
8149 Op1.getOpcode() == ISD::BSWAP &&
8150 Op1.getNode()->hasOneUse() &&
8151 canLoadStoreByteSwapped(Op1.getValueType())) {
8152
8153 SDValue BSwapOp = Op1.getOperand(0);
8154
8155 if (BSwapOp.getValueType() == MVT::i16)
8156 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8157
8158 SDValue Ops[] = {
8159 N->getOperand(0), BSwapOp, N->getOperand(2)
8160 };
8161
8162 return
8163 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8164 Ops, MemVT, SN->getMemOperand());
8165 }
8166 // Combine STORE (element-swap) into VSTER
8167 if (!SN->isTruncatingStore() &&
8168 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8169 Op1.getNode()->hasOneUse() &&
8170 Subtarget.hasVectorEnhancements2()) {
8171 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8172 ArrayRef<int> ShuffleMask = SVN->getMask();
8173 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8174 SDValue Ops[] = {
8175 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8176 };
8177
8179 DAG.getVTList(MVT::Other),
8180 Ops, MemVT, SN->getMemOperand());
8181 }
8182 }
8183
8184 // Combine STORE (READCYCLECOUNTER) into STCKF.
8185 if (!SN->isTruncatingStore() &&
8187 Op1.hasOneUse() &&
8188 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8189 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8191 DAG.getVTList(MVT::Other),
8192 Ops, MemVT, SN->getMemOperand());
8193 }
8194
8195 // Transform a store of a 128-bit value moved from parts into two stores.
8196 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8197 SDValue LoPart, HiPart;
8198 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8199 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8200 SDLoc DL(SN);
8201 SDValue Chain0 = DAG.getStore(
8202 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8203 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8204 SDValue Chain1 = DAG.getStore(
8205 SN->getChain(), DL, LoPart,
8206 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8207 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8208 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8209
8210 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8211 }
8212 }
8213
8214 // Replicate a reg or immediate with VREP instead of scalar multiply or
8215 // immediate load. It seems best to do this during the first DAGCombine as
8216 // it is straight-forward to handle the zero-extend node in the initial
8217 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8218 // extracting an i16 element from a v16i8 vector).
8219 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8220 isOnlyUsedByStores(Op1, DAG)) {
8221 SDValue Word = SDValue();
8222 EVT WordVT;
8223
8224 // Find a replicated immediate and return it if found in Word and its
8225 // type in WordVT.
8226 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8227 // Some constants are better handled with a scalar store.
8228 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8229 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8230 return;
8231
8232 APInt Val = C->getAPIntValue();
8233 // Truncate Val in case of a truncating store.
8234 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8235 assert(SN->isTruncatingStore() &&
8236 "Non-truncating store and immediate value does not fit?");
8237 Val = Val.trunc(TotBytes * 8);
8238 }
8239
8240 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8241 if (VCI.isVectorConstantLegal(Subtarget) &&
8242 VCI.Opcode == SystemZISD::REPLICATE) {
8243 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8244 WordVT = VCI.VecVT.getScalarType();
8245 }
8246 };
8247
8248 // Find a replicated register and return it if found in Word and its type
8249 // in WordVT.
8250 auto FindReplicatedReg = [&](SDValue MulOp) {
8251 EVT MulVT = MulOp.getValueType();
8252 if (MulOp->getOpcode() == ISD::MUL &&
8253 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8254 // Find a zero extended value and its type.
8255 SDValue LHS = MulOp->getOperand(0);
8256 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8257 WordVT = LHS->getOperand(0).getValueType();
8258 else if (LHS->getOpcode() == ISD::AssertZext)
8259 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8260 else
8261 return;
8262 // Find a replicating constant, e.g. 0x00010001.
8263 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8265 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8266 if (VCI.isVectorConstantLegal(Subtarget) &&
8267 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8268 WordVT == VCI.VecVT.getScalarType())
8269 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8270 }
8271 }
8272 };
8273
8274 if (isa<BuildVectorSDNode>(Op1) &&
8275 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8276 SDValue SplatVal = Op1->getOperand(0);
8277 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8278 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8279 else
8280 FindReplicatedReg(SplatVal);
8281 } else {
8282 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8283 FindReplicatedImm(C, MemVT.getStoreSize());
8284 else
8285 FindReplicatedReg(Op1);
8286 }
8287
8288 if (Word != SDValue()) {
8289 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8290 "Bad type handling");
8291 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8292 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8293 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8294 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8295 SN->getBasePtr(), SN->getMemOperand());
8296 }
8297 }
8298
8299 return SDValue();
8300}
8301
8302SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8303 SDNode *N, DAGCombinerInfo &DCI) const {
8304 SelectionDAG &DAG = DCI.DAG;
8305 // Combine element-swap (LOAD) into VLER
8306 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8307 N->getOperand(0).hasOneUse() &&
8308 Subtarget.hasVectorEnhancements2()) {
8309 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8310 ArrayRef<int> ShuffleMask = SVN->getMask();
8311 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8312 SDValue Load = N->getOperand(0);
8313 LoadSDNode *LD = cast<LoadSDNode>(Load);
8314
8315 // Create the element-swapping load.
8316 SDValue Ops[] = {
8317 LD->getChain(), // Chain
8318 LD->getBasePtr() // Ptr
8319 };
8320 SDValue ESLoad =
8322 DAG.getVTList(LD->getValueType(0), MVT::Other),
8323 Ops, LD->getMemoryVT(), LD->getMemOperand());
8324
8325 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8326 // by the load dead.
8327 DCI.CombineTo(N, ESLoad);
8328
8329 // Next, combine the load away, we give it a bogus result value but a real
8330 // chain result. The result value is dead because the shuffle is dead.
8331 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8332
8333 // Return N so it doesn't get rechecked!
8334 return SDValue(N, 0);
8335 }
8336 }
8337
8338 return SDValue();
8339}
8340
8341SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8342 SDNode *N, DAGCombinerInfo &DCI) const {
8343 SelectionDAG &DAG = DCI.DAG;
8344
8345 if (!Subtarget.hasVector())
8346 return SDValue();
8347
8348 // Look through bitcasts that retain the number of vector elements.
8349 SDValue Op = N->getOperand(0);
8350 if (Op.getOpcode() == ISD::BITCAST &&
8351 Op.getValueType().isVector() &&
8352 Op.getOperand(0).getValueType().isVector() &&
8353 Op.getValueType().getVectorNumElements() ==
8354 Op.getOperand(0).getValueType().getVectorNumElements())
8355 Op = Op.getOperand(0);
8356
8357 // Pull BSWAP out of a vector extraction.
8358 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8359 EVT VecVT = Op.getValueType();
8360 EVT EltVT = VecVT.getVectorElementType();
8361 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8362 Op.getOperand(0), N->getOperand(1));
8363 DCI.AddToWorklist(Op.getNode());
8364 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8365 if (EltVT != N->getValueType(0)) {
8366 DCI.AddToWorklist(Op.getNode());
8367 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8368 }
8369 return Op;
8370 }
8371
8372 // Try to simplify a vector extraction.
8373 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8374 SDValue Op0 = N->getOperand(0);
8375 EVT VecVT = Op0.getValueType();
8376 if (canTreatAsByteVector(VecVT))
8377 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8378 IndexN->getZExtValue(), DCI, false);
8379 }
8380 return SDValue();
8381}
8382
8383SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8384 SDNode *N, DAGCombinerInfo &DCI) const {
8385 SelectionDAG &DAG = DCI.DAG;
8386 // (join_dwords X, X) == (replicate X)
8387 if (N->getOperand(0) == N->getOperand(1))
8388 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8389 N->getOperand(0));
8390 return SDValue();
8391}
8392
8394 SDValue Chain1 = N1->getOperand(0);
8395 SDValue Chain2 = N2->getOperand(0);
8396
8397 // Trivial case: both nodes take the same chain.
8398 if (Chain1 == Chain2)
8399 return Chain1;
8400
8401 // FIXME - we could handle more complex cases via TokenFactor,
8402 // assuming we can verify that this would not create a cycle.
8403 return SDValue();
8404}
8405
8406SDValue SystemZTargetLowering::combineFP_ROUND(
8407 SDNode *N, DAGCombinerInfo &DCI) const {
8408
8409 if (!Subtarget.hasVector())
8410 return SDValue();
8411
8412 // (fpround (extract_vector_elt X 0))
8413 // (fpround (extract_vector_elt X 1)) ->
8414 // (extract_vector_elt (VROUND X) 0)
8415 // (extract_vector_elt (VROUND X) 2)
8416 //
8417 // This is a special case since the target doesn't really support v2f32s.
8418 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8419 SelectionDAG &DAG = DCI.DAG;
8420 SDValue Op0 = N->getOperand(OpNo);
8421 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8423 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8424 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8425 Op0.getConstantOperandVal(1) == 0) {
8426 SDValue Vec = Op0.getOperand(0);
8427 for (auto *U : Vec->users()) {
8428 if (U != Op0.getNode() && U->hasOneUse() &&
8429 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8430 U->getOperand(0) == Vec &&
8431 U->getOperand(1).getOpcode() == ISD::Constant &&
8432 U->getConstantOperandVal(1) == 1) {
8433 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8434 if (OtherRound.getOpcode() == N->getOpcode() &&
8435 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8436 OtherRound.getValueType() == MVT::f32) {
8437 SDValue VRound, Chain;
8438 if (N->isStrictFPOpcode()) {
8439 Chain = MergeInputChains(N, OtherRound.getNode());
8440 if (!Chain)
8441 continue;
8443 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8444 Chain = VRound.getValue(1);
8445 } else
8446 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8447 MVT::v4f32, Vec);
8448 DCI.AddToWorklist(VRound.getNode());
8449 SDValue Extract1 =
8450 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8451 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8452 DCI.AddToWorklist(Extract1.getNode());
8453 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8454 if (Chain)
8455 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8456 SDValue Extract0 =
8457 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8458 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8459 if (Chain)
8460 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8461 N->getVTList(), Extract0, Chain);
8462 return Extract0;
8463 }
8464 }
8465 }
8466 }
8467 return SDValue();
8468}
8469
8470SDValue SystemZTargetLowering::combineFP_EXTEND(
8471 SDNode *N, DAGCombinerInfo &DCI) const {
8472
8473 if (!Subtarget.hasVector())
8474 return SDValue();
8475
8476 // (fpextend (extract_vector_elt X 0))
8477 // (fpextend (extract_vector_elt X 2)) ->
8478 // (extract_vector_elt (VEXTEND X) 0)
8479 // (extract_vector_elt (VEXTEND X) 1)
8480 //
8481 // This is a special case since the target doesn't really support v2f32s.
8482 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8483 SelectionDAG &DAG = DCI.DAG;
8484 SDValue Op0 = N->getOperand(OpNo);
8485 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8487 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8488 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8489 Op0.getConstantOperandVal(1) == 0) {
8490 SDValue Vec = Op0.getOperand(0);
8491 for (auto *U : Vec->users()) {
8492 if (U != Op0.getNode() && U->hasOneUse() &&
8493 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8494 U->getOperand(0) == Vec &&
8495 U->getOperand(1).getOpcode() == ISD::Constant &&
8496 U->getConstantOperandVal(1) == 2) {
8497 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8498 if (OtherExtend.getOpcode() == N->getOpcode() &&
8499 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8500 OtherExtend.getValueType() == MVT::f64) {
8501 SDValue VExtend, Chain;
8502 if (N->isStrictFPOpcode()) {
8503 Chain = MergeInputChains(N, OtherExtend.getNode());
8504 if (!Chain)
8505 continue;
8506 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8507 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8508 Chain = VExtend.getValue(1);
8509 } else
8510 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8511 MVT::v2f64, Vec);
8512 DCI.AddToWorklist(VExtend.getNode());
8513 SDValue Extract1 =
8514 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8515 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8516 DCI.AddToWorklist(Extract1.getNode());
8517 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8518 if (Chain)
8519 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8520 SDValue Extract0 =
8521 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8522 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8523 if (Chain)
8524 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8525 N->getVTList(), Extract0, Chain);
8526 return Extract0;
8527 }
8528 }
8529 }
8530 }
8531 return SDValue();
8532}
8533
8534SDValue SystemZTargetLowering::combineINT_TO_FP(
8535 SDNode *N, DAGCombinerInfo &DCI) const {
8536 if (DCI.Level != BeforeLegalizeTypes)
8537 return SDValue();
8538 SelectionDAG &DAG = DCI.DAG;
8539 LLVMContext &Ctx = *DAG.getContext();
8540 unsigned Opcode = N->getOpcode();
8541 EVT OutVT = N->getValueType(0);
8542 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8543 SDValue Op = N->getOperand(0);
8544 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8545 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8546
8547 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8548 // v2f64 = uint_to_fp v2i16
8549 // =>
8550 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8551 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8552 OutScalarBits <= 64) {
8553 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8554 EVT ExtVT = EVT::getVectorVT(
8555 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8556 unsigned ExtOpcode =
8558 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8559 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8560 }
8561 return SDValue();
8562}
8563
8564SDValue SystemZTargetLowering::combineFCOPYSIGN(
8565 SDNode *N, DAGCombinerInfo &DCI) const {
8566 SelectionDAG &DAG = DCI.DAG;
8567 EVT VT = N->getValueType(0);
8568 SDValue ValOp = N->getOperand(0);
8569 SDValue SignOp = N->getOperand(1);
8570
8571 // Remove the rounding which is not needed.
8572 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8573 SDValue WideOp = SignOp.getOperand(0);
8574 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8575 }
8576
8577 return SDValue();
8578}
8579
8580SDValue SystemZTargetLowering::combineBSWAP(
8581 SDNode *N, DAGCombinerInfo &DCI) const {
8582 SelectionDAG &DAG = DCI.DAG;
8583 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8584 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8585 N->getOperand(0).hasOneUse() &&
8586 canLoadStoreByteSwapped(N->getValueType(0))) {
8587 SDValue Load = N->getOperand(0);
8588 LoadSDNode *LD = cast<LoadSDNode>(Load);
8589
8590 // Create the byte-swapping load.
8591 SDValue Ops[] = {
8592 LD->getChain(), // Chain
8593 LD->getBasePtr() // Ptr
8594 };
8595 EVT LoadVT = N->getValueType(0);
8596 if (LoadVT == MVT::i16)
8597 LoadVT = MVT::i32;
8598 SDValue BSLoad =
8600 DAG.getVTList(LoadVT, MVT::Other),
8601 Ops, LD->getMemoryVT(), LD->getMemOperand());
8602
8603 // If this is an i16 load, insert the truncate.
8604 SDValue ResVal = BSLoad;
8605 if (N->getValueType(0) == MVT::i16)
8606 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8607
8608 // First, combine the bswap away. This makes the value produced by the
8609 // load dead.
8610 DCI.CombineTo(N, ResVal);
8611
8612 // Next, combine the load away, we give it a bogus result value but a real
8613 // chain result. The result value is dead because the bswap is dead.
8614 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8615
8616 // Return N so it doesn't get rechecked!
8617 return SDValue(N, 0);
8618 }
8619
8620 // Look through bitcasts that retain the number of vector elements.
8621 SDValue Op = N->getOperand(0);
8622 if (Op.getOpcode() == ISD::BITCAST &&
8623 Op.getValueType().isVector() &&
8624 Op.getOperand(0).getValueType().isVector() &&
8625 Op.getValueType().getVectorNumElements() ==
8626 Op.getOperand(0).getValueType().getVectorNumElements())
8627 Op = Op.getOperand(0);
8628
8629 // Push BSWAP into a vector insertion if at least one side then simplifies.
8630 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8631 SDValue Vec = Op.getOperand(0);
8632 SDValue Elt = Op.getOperand(1);
8633 SDValue Idx = Op.getOperand(2);
8634
8636 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8638 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8639 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8640 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8641 EVT VecVT = N->getValueType(0);
8642 EVT EltVT = N->getValueType(0).getVectorElementType();
8643 if (VecVT != Vec.getValueType()) {
8644 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8645 DCI.AddToWorklist(Vec.getNode());
8646 }
8647 if (EltVT != Elt.getValueType()) {
8648 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8649 DCI.AddToWorklist(Elt.getNode());
8650 }
8651 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8652 DCI.AddToWorklist(Vec.getNode());
8653 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8654 DCI.AddToWorklist(Elt.getNode());
8655 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8656 Vec, Elt, Idx);
8657 }
8658 }
8659
8660 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8661 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8662 if (SV && Op.hasOneUse()) {
8663 SDValue Op0 = Op.getOperand(0);
8664 SDValue Op1 = Op.getOperand(1);
8665
8667 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8669 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8670 EVT VecVT = N->getValueType(0);
8671 if (VecVT != Op0.getValueType()) {
8672 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8673 DCI.AddToWorklist(Op0.getNode());
8674 }
8675 if (VecVT != Op1.getValueType()) {
8676 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8677 DCI.AddToWorklist(Op1.getNode());
8678 }
8679 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8680 DCI.AddToWorklist(Op0.getNode());
8681 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8682 DCI.AddToWorklist(Op1.getNode());
8683 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8684 }
8685 }
8686
8687 return SDValue();
8688}
8689
8690SDValue SystemZTargetLowering::combineSETCC(
8691 SDNode *N, DAGCombinerInfo &DCI) const {
8692 SelectionDAG &DAG = DCI.DAG;
8693 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8694 const SDValue LHS = N->getOperand(0);
8695 const SDValue RHS = N->getOperand(1);
8696 bool CmpNull = isNullConstant(RHS);
8697 bool CmpAllOnes = isAllOnesConstant(RHS);
8698 EVT VT = N->getValueType(0);
8699 SDLoc DL(N);
8700
8701 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8702 // change the outer compare to a i128 compare. This will normally
8703 // allow the reduction to be recognized in adjustICmp128, and even if
8704 // not, the i128 compare will still generate better code.
8705 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8706 SDValue Src = peekThroughBitcasts(LHS);
8707 if (Src.getOpcode() == ISD::SETCC &&
8708 Src.getValueType().isFixedLengthVector() &&
8709 Src.getValueType().getScalarType() == MVT::i1) {
8710 EVT CmpVT = Src.getOperand(0).getValueType();
8711 if (CmpVT.getSizeInBits() == 128) {
8712 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8713 SDValue LHS =
8714 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8715 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8716 : DAG.getAllOnesConstant(DL, MVT::i128);
8717 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8718 N->getFlags());
8719 }
8720 }
8721 }
8722
8723 return SDValue();
8724}
8725
8726static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
8727 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8728 // set by the CCReg instruction using the CCValid / CCMask masks,
8729 // If the CCReg instruction is itself a ICMP testing the condition
8730 // code set by some other instruction, see whether we can directly
8731 // use that condition code.
8732
8733 // Verify that we have an ICMP against some constant.
8734 if (CCValid != SystemZ::CCMASK_ICMP)
8735 return false;
8736 auto *ICmp = CCReg.getNode();
8737 if (ICmp->getOpcode() != SystemZISD::ICMP)
8738 return false;
8739 auto *CompareLHS = ICmp->getOperand(0).getNode();
8740 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
8741 if (!CompareRHS)
8742 return false;
8743
8744 // Optimize the case where CompareLHS is a SELECT_CCMASK.
8745 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
8746 // Verify that we have an appropriate mask for a EQ or NE comparison.
8747 bool Invert = false;
8748 if (CCMask == SystemZ::CCMASK_CMP_NE)
8749 Invert = !Invert;
8750 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
8751 return false;
8752
8753 // Verify that the ICMP compares against one of select values.
8754 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
8755 if (!TrueVal)
8756 return false;
8757 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
8758 if (!FalseVal)
8759 return false;
8760 if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
8761 Invert = !Invert;
8762 else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
8763 return false;
8764
8765 // Compute the effective CC mask for the new branch or select.
8766 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
8767 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
8768 if (!NewCCValid || !NewCCMask)
8769 return false;
8770 CCValid = NewCCValid->getZExtValue();
8771 CCMask = NewCCMask->getZExtValue();
8772 if (Invert)
8773 CCMask ^= CCValid;
8774
8775 // Return the updated CCReg link.
8776 CCReg = CompareLHS->getOperand(4);
8777 return true;
8778 }
8779
8780 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
8781 if (CompareLHS->getOpcode() == ISD::SRA) {
8782 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
8783 if (!SRACount || SRACount->getZExtValue() != 30)
8784 return false;
8785 auto *SHL = CompareLHS->getOperand(0).getNode();
8786 if (SHL->getOpcode() != ISD::SHL)
8787 return false;
8788 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
8789 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
8790 return false;
8791 auto *IPM = SHL->getOperand(0).getNode();
8792 if (IPM->getOpcode() != SystemZISD::IPM)
8793 return false;
8794
8795 // Avoid introducing CC spills (because SRA would clobber CC).
8796 if (!CompareLHS->hasOneUse())
8797 return false;
8798 // Verify that the ICMP compares against zero.
8799 if (CompareRHS->getZExtValue() != 0)
8800 return false;
8801
8802 // Compute the effective CC mask for the new branch or select.
8803 CCMask = SystemZ::reverseCCMask(CCMask);
8804
8805 // Return the updated CCReg link.
8806 CCReg = IPM->getOperand(0);
8807 return true;
8808 }
8809
8810 return false;
8811}
8812
8813SDValue SystemZTargetLowering::combineBR_CCMASK(
8814 SDNode *N, DAGCombinerInfo &DCI) const {
8815 SelectionDAG &DAG = DCI.DAG;
8816
8817 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8818 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8819 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8820 if (!CCValid || !CCMask)
8821 return SDValue();
8822
8823 int CCValidVal = CCValid->getZExtValue();
8824 int CCMaskVal = CCMask->getZExtValue();
8825 SDValue Chain = N->getOperand(0);
8826 SDValue CCReg = N->getOperand(4);
8827
8828 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8829 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8830 Chain,
8831 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8832 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8833 N->getOperand(3), CCReg);
8834 return SDValue();
8835}
8836
8837SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8838 SDNode *N, DAGCombinerInfo &DCI) const {
8839 SelectionDAG &DAG = DCI.DAG;
8840
8841 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8842 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8843 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8844 if (!CCValid || !CCMask)
8845 return SDValue();
8846
8847 int CCValidVal = CCValid->getZExtValue();
8848 int CCMaskVal = CCMask->getZExtValue();
8849 SDValue CCReg = N->getOperand(4);
8850
8851 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8852 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
8853 N->getOperand(0), N->getOperand(1),
8854 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8855 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8856 CCReg);
8857 return SDValue();
8858}
8859
8860
8861SDValue SystemZTargetLowering::combineGET_CCMASK(
8862 SDNode *N, DAGCombinerInfo &DCI) const {
8863
8864 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8865 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8866 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8867 if (!CCValid || !CCMask)
8868 return SDValue();
8869 int CCValidVal = CCValid->getZExtValue();
8870 int CCMaskVal = CCMask->getZExtValue();
8871
8872 SDValue Select = N->getOperand(0);
8873 if (Select->getOpcode() == ISD::TRUNCATE)
8874 Select = Select->getOperand(0);
8875 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
8876 return SDValue();
8877
8878 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
8879 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
8880 if (!SelectCCValid || !SelectCCMask)
8881 return SDValue();
8882 int SelectCCValidVal = SelectCCValid->getZExtValue();
8883 int SelectCCMaskVal = SelectCCMask->getZExtValue();
8884
8885 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
8886 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
8887 if (!TrueVal || !FalseVal)
8888 return SDValue();
8889 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
8890 ;
8891 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
8892 SelectCCMaskVal ^= SelectCCValidVal;
8893 else
8894 return SDValue();
8895
8896 if (SelectCCValidVal & ~CCValidVal)
8897 return SDValue();
8898 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
8899 return SDValue();
8900
8901 return Select->getOperand(4);
8902}
8903
8904SDValue SystemZTargetLowering::combineIntDIVREM(
8905 SDNode *N, DAGCombinerInfo &DCI) const {
8906 SelectionDAG &DAG = DCI.DAG;
8907 EVT VT = N->getValueType(0);
8908 // In the case where the divisor is a vector of constants a cheaper
8909 // sequence of instructions can replace the divide. BuildSDIV is called to
8910 // do this during DAG combining, but it only succeeds when it can build a
8911 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
8912 // since it is not Legal but Custom it can only happen before
8913 // legalization. Therefore we must scalarize this early before Combine
8914 // 1. For widened vectors, this is already the result of type legalization.
8915 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
8916 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
8917 return DAG.UnrollVectorOp(N);
8918 return SDValue();
8919}
8920
8921
8922// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
8923// This is closely modeled after the common-code combineShiftToMULH.
8924SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
8925 SDNode *N, DAGCombinerInfo &DCI) const {
8926 SelectionDAG &DAG = DCI.DAG;
8927 SDLoc DL(N);
8928
8929 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8930 "SRL or SRA node is required here!");
8931
8932 if (!Subtarget.hasVector())
8933 return SDValue();
8934
8935 // Check the shift amount. Proceed with the transformation if the shift
8936 // amount is constant.
8937 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8938 if (!ShiftAmtSrc)
8939 return SDValue();
8940
8941 // The operation feeding into the shift must be an add.
8942 SDValue ShiftOperand = N->getOperand(0);
8943 if (ShiftOperand.getOpcode() != ISD::ADD)
8944 return SDValue();
8945
8946 // One operand of the add must be a multiply.
8947 SDValue MulOp = ShiftOperand.getOperand(0);
8948 SDValue AddOp = ShiftOperand.getOperand(1);
8949 if (MulOp.getOpcode() != ISD::MUL) {
8950 if (AddOp.getOpcode() != ISD::MUL)
8951 return SDValue();
8952 std::swap(MulOp, AddOp);
8953 }
8954
8955 // All operands must be equivalent extend nodes.
8956 SDValue LeftOp = MulOp.getOperand(0);
8957 SDValue RightOp = MulOp.getOperand(1);
8958
8959 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8960 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8961
8962 if (!IsSignExt && !IsZeroExt)
8963 return SDValue();
8964
8965 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8966 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8967
8968 SDValue MulhRightOp;
8970 unsigned ActiveBits = IsSignExt
8971 ? Constant->getAPIntValue().getSignificantBits()
8972 : Constant->getAPIntValue().getActiveBits();
8973 if (ActiveBits > NarrowVTSize)
8974 return SDValue();
8975 MulhRightOp = DAG.getConstant(
8976 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8977 NarrowVT);
8978 } else {
8979 if (LeftOp.getOpcode() != RightOp.getOpcode())
8980 return SDValue();
8981 // Check that the two extend nodes are the same type.
8982 if (NarrowVT != RightOp.getOperand(0).getValueType())
8983 return SDValue();
8984 MulhRightOp = RightOp.getOperand(0);
8985 }
8986
8987 SDValue MulhAddOp;
8989 unsigned ActiveBits = IsSignExt
8990 ? Constant->getAPIntValue().getSignificantBits()
8991 : Constant->getAPIntValue().getActiveBits();
8992 if (ActiveBits > NarrowVTSize)
8993 return SDValue();
8994 MulhAddOp = DAG.getConstant(
8995 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8996 NarrowVT);
8997 } else {
8998 if (LeftOp.getOpcode() != AddOp.getOpcode())
8999 return SDValue();
9000 // Check that the two extend nodes are the same type.
9001 if (NarrowVT != AddOp.getOperand(0).getValueType())
9002 return SDValue();
9003 MulhAddOp = AddOp.getOperand(0);
9004 }
9005
9006 EVT WideVT = LeftOp.getValueType();
9007 // Proceed with the transformation if the wide types match.
9008 assert((WideVT == RightOp.getValueType()) &&
9009 "Cannot have a multiply node with two different operand types.");
9010 assert((WideVT == AddOp.getValueType()) &&
9011 "Cannot have an add node with two different operand types.");
9012
9013 // Proceed with the transformation if the wide type is twice as large
9014 // as the narrow type.
9015 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9016 return SDValue();
9017
9018 // Check the shift amount with the narrow type size.
9019 // Proceed with the transformation if the shift amount is the width
9020 // of the narrow type.
9021 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9022 if (ShiftAmt != NarrowVTSize)
9023 return SDValue();
9024
9025 // Proceed if we support the multiply-and-add-high operation.
9026 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9027 NarrowVT == MVT::v4i32 ||
9028 (Subtarget.hasVectorEnhancements3() &&
9029 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9030 return SDValue();
9031
9032 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9034 DL, NarrowVT, LeftOp.getOperand(0),
9035 MulhRightOp, MulhAddOp);
9036 bool IsSigned = N->getOpcode() == ISD::SRA;
9037 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9038}
9039
9040// Op is an operand of a multiplication. Check whether this can be folded
9041// into an even/odd widening operation; if so, return the opcode to be used
9042// and update Op to the appropriate sub-operand. Note that the caller must
9043// verify that *both* operands of the multiplication support the operation.
9045 const SystemZSubtarget &Subtarget,
9046 SDValue &Op) {
9047 EVT VT = Op.getValueType();
9048
9049 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9050 // to selecting the even or odd vector elements.
9051 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9052 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9053 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9054 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9055 unsigned NumElts = VT.getVectorNumElements();
9056 Op = Op.getOperand(0);
9057 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9058 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9059 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
9060 ArrayRef<int> ShuffleMask = SVN->getMask();
9061 bool CanUseEven = true, CanUseOdd = true;
9062 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9063 if (ShuffleMask[Elt] == -1)
9064 continue;
9065 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9066 CanUseEven = false;
9067 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9068 CanUseOdd = false;
9069 }
9070 Op = Op.getOperand(0);
9071 if (CanUseEven)
9072 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9073 if (CanUseOdd)
9074 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9075 }
9076 }
9077
9078 // For z17, we can also support the v2i64->i128 case, which looks like
9079 // (sign/zero_extend (extract_vector_elt X 0/1))
9080 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9081 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9082 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9083 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9084 Op = Op.getOperand(0);
9085 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9086 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9087 Op.getOperand(1).getOpcode() == ISD::Constant) {
9088 unsigned Elem = Op.getConstantOperandVal(1);
9089 Op = Op.getOperand(0);
9090 if (Elem == 0)
9091 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9092 if (Elem == 1)
9093 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9094 }
9095 }
9096
9097 return 0;
9098}
9099
9100SDValue SystemZTargetLowering::combineMUL(
9101 SDNode *N, DAGCombinerInfo &DCI) const {
9102 SelectionDAG &DAG = DCI.DAG;
9103
9104 // Detect even/odd widening multiplication.
9105 SDValue Op0 = N->getOperand(0);
9106 SDValue Op1 = N->getOperand(1);
9107 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9108 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9109 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9110 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9111
9112 return SDValue();
9113}
9114
9115SDValue SystemZTargetLowering::combineINTRINSIC(
9116 SDNode *N, DAGCombinerInfo &DCI) const {
9117 SelectionDAG &DAG = DCI.DAG;
9118
9119 unsigned Id = N->getConstantOperandVal(1);
9120 switch (Id) {
9121 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9122 // or larger is simply a vector load.
9123 case Intrinsic::s390_vll:
9124 case Intrinsic::s390_vlrl:
9125 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9126 if (C->getZExtValue() >= 15)
9127 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9128 N->getOperand(3), MachinePointerInfo());
9129 break;
9130 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9131 case Intrinsic::s390_vstl:
9132 case Intrinsic::s390_vstrl:
9133 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9134 if (C->getZExtValue() >= 15)
9135 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9136 N->getOperand(4), MachinePointerInfo());
9137 break;
9138 }
9139
9140 return SDValue();
9141}
9142
9143SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9144 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9145 return N->getOperand(0);
9146 return N;
9147}
9148
9150 DAGCombinerInfo &DCI) const {
9151 switch(N->getOpcode()) {
9152 default: break;
9153 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9154 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9155 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9157 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9158 case ISD::LOAD: return combineLOAD(N, DCI);
9159 case ISD::STORE: return combineSTORE(N, DCI);
9160 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9161 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9162 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9164 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9166 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9167 case ISD::SINT_TO_FP:
9168 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9169 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9170 case ISD::BSWAP: return combineBSWAP(N, DCI);
9171 case ISD::SETCC: return combineSETCC(N, DCI);
9172 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9173 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9174 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9175 case ISD::SRL:
9176 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9177 case ISD::MUL: return combineMUL(N, DCI);
9178 case ISD::SDIV:
9179 case ISD::UDIV:
9180 case ISD::SREM:
9181 case ISD::UREM: return combineIntDIVREM(N, DCI);
9183 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9184 }
9185
9186 return SDValue();
9187}
9188
9189// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9190// are for Op.
9191static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9192 unsigned OpNo) {
9193 EVT VT = Op.getValueType();
9194 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9195 APInt SrcDemE;
9196 unsigned Opcode = Op.getOpcode();
9197 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9198 unsigned Id = Op.getConstantOperandVal(0);
9199 switch (Id) {
9200 case Intrinsic::s390_vpksh: // PACKS
9201 case Intrinsic::s390_vpksf:
9202 case Intrinsic::s390_vpksg:
9203 case Intrinsic::s390_vpkshs: // PACKS_CC
9204 case Intrinsic::s390_vpksfs:
9205 case Intrinsic::s390_vpksgs:
9206 case Intrinsic::s390_vpklsh: // PACKLS
9207 case Intrinsic::s390_vpklsf:
9208 case Intrinsic::s390_vpklsg:
9209 case Intrinsic::s390_vpklshs: // PACKLS_CC
9210 case Intrinsic::s390_vpklsfs:
9211 case Intrinsic::s390_vpklsgs:
9212 // VECTOR PACK truncates the elements of two source vectors into one.
9213 SrcDemE = DemandedElts;
9214 if (OpNo == 2)
9215 SrcDemE.lshrInPlace(NumElts / 2);
9216 SrcDemE = SrcDemE.trunc(NumElts / 2);
9217 break;
9218 // VECTOR UNPACK extends half the elements of the source vector.
9219 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9220 case Intrinsic::s390_vuphh:
9221 case Intrinsic::s390_vuphf:
9222 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9223 case Intrinsic::s390_vuplhh:
9224 case Intrinsic::s390_vuplhf:
9225 SrcDemE = APInt(NumElts * 2, 0);
9226 SrcDemE.insertBits(DemandedElts, 0);
9227 break;
9228 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9229 case Intrinsic::s390_vuplhw:
9230 case Intrinsic::s390_vuplf:
9231 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9232 case Intrinsic::s390_vupllh:
9233 case Intrinsic::s390_vupllf:
9234 SrcDemE = APInt(NumElts * 2, 0);
9235 SrcDemE.insertBits(DemandedElts, NumElts);
9236 break;
9237 case Intrinsic::s390_vpdi: {
9238 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9239 SrcDemE = APInt(NumElts, 0);
9240 if (!DemandedElts[OpNo - 1])
9241 break;
9242 unsigned Mask = Op.getConstantOperandVal(3);
9243 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9244 // Demand input element 0 or 1, given by the mask bit value.
9245 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9246 break;
9247 }
9248 case Intrinsic::s390_vsldb: {
9249 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9250 assert(VT == MVT::v16i8 && "Unexpected type.");
9251 unsigned FirstIdx = Op.getConstantOperandVal(3);
9252 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9253 unsigned NumSrc0Els = 16 - FirstIdx;
9254 SrcDemE = APInt(NumElts, 0);
9255 if (OpNo == 1) {
9256 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9257 SrcDemE.insertBits(DemEls, FirstIdx);
9258 } else {
9259 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9260 SrcDemE.insertBits(DemEls, 0);
9261 }
9262 break;
9263 }
9264 case Intrinsic::s390_vperm:
9265 SrcDemE = APInt::getAllOnes(NumElts);
9266 break;
9267 default:
9268 llvm_unreachable("Unhandled intrinsic.");
9269 break;
9270 }
9271 } else {
9272 switch (Opcode) {
9274 // Scalar operand.
9275 SrcDemE = APInt(1, 1);
9276 break;
9278 SrcDemE = DemandedElts;
9279 break;
9280 default:
9281 llvm_unreachable("Unhandled opcode.");
9282 break;
9283 }
9284 }
9285 return SrcDemE;
9286}
9287
9288static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9289 const APInt &DemandedElts,
9290 const SelectionDAG &DAG, unsigned Depth,
9291 unsigned OpNo) {
9292 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9293 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9294 KnownBits LHSKnown =
9295 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9296 KnownBits RHSKnown =
9297 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9298 Known = LHSKnown.intersectWith(RHSKnown);
9299}
9300
9301void
9303 KnownBits &Known,
9304 const APInt &DemandedElts,
9305 const SelectionDAG &DAG,
9306 unsigned Depth) const {
9307 Known.resetAll();
9308
9309 // Intrinsic CC result is returned in the two low bits.
9310 unsigned Tmp0, Tmp1; // not used
9311 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9312 Known.Zero.setBitsFrom(2);
9313 return;
9314 }
9315 EVT VT = Op.getValueType();
9316 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9317 return;
9318 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9319 "KnownBits does not match VT in bitwidth");
9320 assert ((!VT.isVector() ||
9321 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9322 "DemandedElts does not match VT number of elements");
9323 unsigned BitWidth = Known.getBitWidth();
9324 unsigned Opcode = Op.getOpcode();
9325 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9326 bool IsLogical = false;
9327 unsigned Id = Op.getConstantOperandVal(0);
9328 switch (Id) {
9329 case Intrinsic::s390_vpksh: // PACKS
9330 case Intrinsic::s390_vpksf:
9331 case Intrinsic::s390_vpksg:
9332 case Intrinsic::s390_vpkshs: // PACKS_CC
9333 case Intrinsic::s390_vpksfs:
9334 case Intrinsic::s390_vpksgs:
9335 case Intrinsic::s390_vpklsh: // PACKLS
9336 case Intrinsic::s390_vpklsf:
9337 case Intrinsic::s390_vpklsg:
9338 case Intrinsic::s390_vpklshs: // PACKLS_CC
9339 case Intrinsic::s390_vpklsfs:
9340 case Intrinsic::s390_vpklsgs:
9341 case Intrinsic::s390_vpdi:
9342 case Intrinsic::s390_vsldb:
9343 case Intrinsic::s390_vperm:
9344 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9345 break;
9346 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9347 case Intrinsic::s390_vuplhh:
9348 case Intrinsic::s390_vuplhf:
9349 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9350 case Intrinsic::s390_vupllh:
9351 case Intrinsic::s390_vupllf:
9352 IsLogical = true;
9353 [[fallthrough]];
9354 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9355 case Intrinsic::s390_vuphh:
9356 case Intrinsic::s390_vuphf:
9357 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9358 case Intrinsic::s390_vuplhw:
9359 case Intrinsic::s390_vuplf: {
9360 SDValue SrcOp = Op.getOperand(1);
9361 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9362 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9363 if (IsLogical) {
9364 Known = Known.zext(BitWidth);
9365 } else
9366 Known = Known.sext(BitWidth);
9367 break;
9368 }
9369 default:
9370 break;
9371 }
9372 } else {
9373 switch (Opcode) {
9376 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9377 break;
9378 case SystemZISD::REPLICATE: {
9379 SDValue SrcOp = Op.getOperand(0);
9380 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9381 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
9382 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9383 break;
9384 }
9385 default:
9386 break;
9387 }
9388 }
9389
9390 // Known has the width of the source operand(s). Adjust if needed to match
9391 // the passed bitwidth.
9392 if (Known.getBitWidth() != BitWidth)
9393 Known = Known.anyextOrTrunc(BitWidth);
9394}
9395
9396static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9397 const SelectionDAG &DAG, unsigned Depth,
9398 unsigned OpNo) {
9399 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9400 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9401 if (LHS == 1) return 1; // Early out.
9402 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9403 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9404 if (RHS == 1) return 1; // Early out.
9405 unsigned Common = std::min(LHS, RHS);
9406 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9407 EVT VT = Op.getValueType();
9408 unsigned VTBits = VT.getScalarSizeInBits();
9409 if (SrcBitWidth > VTBits) { // PACK
9410 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9411 if (Common > SrcExtraBits)
9412 return (Common - SrcExtraBits);
9413 return 1;
9414 }
9415 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9416 return Common;
9417}
9418
9419unsigned
9421 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9422 unsigned Depth) const {
9423 if (Op.getResNo() != 0)
9424 return 1;
9425 unsigned Opcode = Op.getOpcode();
9426 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9427 unsigned Id = Op.getConstantOperandVal(0);
9428 switch (Id) {
9429 case Intrinsic::s390_vpksh: // PACKS
9430 case Intrinsic::s390_vpksf:
9431 case Intrinsic::s390_vpksg:
9432 case Intrinsic::s390_vpkshs: // PACKS_CC
9433 case Intrinsic::s390_vpksfs:
9434 case Intrinsic::s390_vpksgs:
9435 case Intrinsic::s390_vpklsh: // PACKLS
9436 case Intrinsic::s390_vpklsf:
9437 case Intrinsic::s390_vpklsg:
9438 case Intrinsic::s390_vpklshs: // PACKLS_CC
9439 case Intrinsic::s390_vpklsfs:
9440 case Intrinsic::s390_vpklsgs:
9441 case Intrinsic::s390_vpdi:
9442 case Intrinsic::s390_vsldb:
9443 case Intrinsic::s390_vperm:
9444 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9445 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9446 case Intrinsic::s390_vuphh:
9447 case Intrinsic::s390_vuphf:
9448 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9449 case Intrinsic::s390_vuplhw:
9450 case Intrinsic::s390_vuplf: {
9451 SDValue PackedOp = Op.getOperand(1);
9452 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9453 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9454 EVT VT = Op.getValueType();
9455 unsigned VTBits = VT.getScalarSizeInBits();
9456 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9457 return Tmp;
9458 }
9459 default:
9460 break;
9461 }
9462 } else {
9463 switch (Opcode) {
9465 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9466 default:
9467 break;
9468 }
9469 }
9470
9471 return 1;
9472}
9473
9476 const APInt &DemandedElts, const SelectionDAG &DAG,
9477 bool PoisonOnly, unsigned Depth) const {
9478 switch (Op->getOpcode()) {
9481 return true;
9482 }
9483 return false;
9484}
9485
9486unsigned
9488 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9489 unsigned StackAlign = TFI->getStackAlignment();
9490 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9491 "Unexpected stack alignment");
9492 // The default stack probe size is 4096 if the function has no
9493 // stack-probe-size attribute.
9494 unsigned StackProbeSize =
9495 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9496 // Round down to the stack alignment.
9497 StackProbeSize &= ~(StackAlign - 1);
9498 return StackProbeSize ? StackProbeSize : StackAlign;
9499}
9500
9501//===----------------------------------------------------------------------===//
9502// Custom insertion
9503//===----------------------------------------------------------------------===//
9504
9505// Force base value Base into a register before MI. Return the register.
9507 const SystemZInstrInfo *TII) {
9508 MachineBasicBlock *MBB = MI.getParent();
9509 MachineFunction &MF = *MBB->getParent();
9511
9512 if (Base.isReg()) {
9513 // Copy Base into a new virtual register to help register coalescing in
9514 // cases with multiple uses.
9515 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9516 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9517 .add(Base);
9518 return Reg;
9519 }
9520
9521 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9522 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9523 .add(Base)
9524 .addImm(0)
9525 .addReg(0);
9526 return Reg;
9527}
9528
9529// The CC operand of MI might be missing a kill marker because there
9530// were multiple uses of CC, and ISel didn't know which to mark.
9531// Figure out whether MI should have had a kill marker.
9533 // Scan forward through BB for a use/def of CC.
9535 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9536 const MachineInstr &MI = *miI;
9537 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9538 return false;
9539 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9540 break; // Should have kill-flag - update below.
9541 }
9542
9543 // If we hit the end of the block, check whether CC is live into a
9544 // successor.
9545 if (miI == MBB->end()) {
9546 for (const MachineBasicBlock *Succ : MBB->successors())
9547 if (Succ->isLiveIn(SystemZ::CC))
9548 return false;
9549 }
9550
9551 return true;
9552}
9553
9554// Return true if it is OK for this Select pseudo-opcode to be cascaded
9555// together with other Select pseudo-opcodes into a single basic-block with
9556// a conditional jump around it.
9558 switch (MI.getOpcode()) {
9559 case SystemZ::Select32:
9560 case SystemZ::Select64:
9561 case SystemZ::Select128:
9562 case SystemZ::SelectF32:
9563 case SystemZ::SelectF64:
9564 case SystemZ::SelectF128:
9565 case SystemZ::SelectVR32:
9566 case SystemZ::SelectVR64:
9567 case SystemZ::SelectVR128:
9568 return true;
9569
9570 default:
9571 return false;
9572 }
9573}
9574
9575// Helper function, which inserts PHI functions into SinkMBB:
9576// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9577// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9579 MachineBasicBlock *TrueMBB,
9580 MachineBasicBlock *FalseMBB,
9581 MachineBasicBlock *SinkMBB) {
9582 MachineFunction *MF = TrueMBB->getParent();
9584
9585 MachineInstr *FirstMI = Selects.front();
9586 unsigned CCValid = FirstMI->getOperand(3).getImm();
9587 unsigned CCMask = FirstMI->getOperand(4).getImm();
9588
9589 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9590
9591 // As we are creating the PHIs, we have to be careful if there is more than
9592 // one. Later Selects may reference the results of earlier Selects, but later
9593 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9594 // That also means that PHI construction must work forward from earlier to
9595 // later, and that the code must maintain a mapping from earlier PHI's
9596 // destination registers, and the registers that went into the PHI.
9598
9599 for (auto *MI : Selects) {
9600 Register DestReg = MI->getOperand(0).getReg();
9601 Register TrueReg = MI->getOperand(1).getReg();
9602 Register FalseReg = MI->getOperand(2).getReg();
9603
9604 // If this Select we are generating is the opposite condition from
9605 // the jump we generated, then we have to swap the operands for the
9606 // PHI that is going to be generated.
9607 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9608 std::swap(TrueReg, FalseReg);
9609
9610 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9611 TrueReg = It->second.first;
9612
9613 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9614 FalseReg = It->second.second;
9615
9616 DebugLoc DL = MI->getDebugLoc();
9617 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9618 .addReg(TrueReg).addMBB(TrueMBB)
9619 .addReg(FalseReg).addMBB(FalseMBB);
9620
9621 // Add this PHI to the rewrite table.
9622 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9623 }
9624
9625 MF->getProperties().resetNoPHIs();
9626}
9627
9629SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9630 MachineBasicBlock *BB) const {
9631 MachineFunction &MF = *BB->getParent();
9632 MachineFrameInfo &MFI = MF.getFrameInfo();
9633 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9634 assert(TFL->hasReservedCallFrame(MF) &&
9635 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9636 (void)TFL;
9637 // Get the MaxCallFrameSize value and erase MI since it serves no further
9638 // purpose as the call frame is statically reserved in the prolog. Set
9639 // AdjustsStack as MI is *not* mapped as a frame instruction.
9640 uint32_t NumBytes = MI.getOperand(0).getImm();
9641 if (NumBytes > MFI.getMaxCallFrameSize())
9642 MFI.setMaxCallFrameSize(NumBytes);
9643 MFI.setAdjustsStack(true);
9644
9645 MI.eraseFromParent();
9646 return BB;
9647}
9648
9649// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9651SystemZTargetLowering::emitSelect(MachineInstr &MI,
9652 MachineBasicBlock *MBB) const {
9653 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9654 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9655
9656 unsigned CCValid = MI.getOperand(3).getImm();
9657 unsigned CCMask = MI.getOperand(4).getImm();
9658
9659 // If we have a sequence of Select* pseudo instructions using the
9660 // same condition code value, we want to expand all of them into
9661 // a single pair of basic blocks using the same condition.
9664 Selects.push_back(&MI);
9665 unsigned Count = 0;
9666 for (MachineInstr &NextMI : llvm::make_range(
9667 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9668 if (isSelectPseudo(NextMI)) {
9669 assert(NextMI.getOperand(3).getImm() == CCValid &&
9670 "Bad CCValid operands since CC was not redefined.");
9671 if (NextMI.getOperand(4).getImm() == CCMask ||
9672 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9673 Selects.push_back(&NextMI);
9674 continue;
9675 }
9676 break;
9677 }
9678 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9679 NextMI.usesCustomInsertionHook())
9680 break;
9681 bool User = false;
9682 for (auto *SelMI : Selects)
9683 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9684 User = true;
9685 break;
9686 }
9687 if (NextMI.isDebugInstr()) {
9688 if (User) {
9689 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9690 DbgValues.push_back(&NextMI);
9691 }
9692 } else if (User || ++Count > 20)
9693 break;
9694 }
9695
9696 MachineInstr *LastMI = Selects.back();
9697 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9698 checkCCKill(*LastMI, MBB));
9699 MachineBasicBlock *StartMBB = MBB;
9701 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9702
9703 // Unless CC was killed in the last Select instruction, mark it as
9704 // live-in to both FalseMBB and JoinMBB.
9705 if (!CCKilled) {
9706 FalseMBB->addLiveIn(SystemZ::CC);
9707 JoinMBB->addLiveIn(SystemZ::CC);
9708 }
9709
9710 // StartMBB:
9711 // BRC CCMask, JoinMBB
9712 // # fallthrough to FalseMBB
9713 MBB = StartMBB;
9714 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9715 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9716 MBB->addSuccessor(JoinMBB);
9717 MBB->addSuccessor(FalseMBB);
9718
9719 // FalseMBB:
9720 // # fallthrough to JoinMBB
9721 MBB = FalseMBB;
9722 MBB->addSuccessor(JoinMBB);
9723
9724 // JoinMBB:
9725 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9726 // ...
9727 MBB = JoinMBB;
9728 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9729 for (auto *SelMI : Selects)
9730 SelMI->eraseFromParent();
9731
9733 for (auto *DbgMI : DbgValues)
9734 MBB->splice(InsertPos, StartMBB, DbgMI);
9735
9736 return JoinMBB;
9737}
9738
9739// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9740// StoreOpcode is the store to use and Invert says whether the store should
9741// happen when the condition is false rather than true. If a STORE ON
9742// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9743MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9745 unsigned StoreOpcode,
9746 unsigned STOCOpcode,
9747 bool Invert) const {
9748 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9749
9750 Register SrcReg = MI.getOperand(0).getReg();
9751 MachineOperand Base = MI.getOperand(1);
9752 int64_t Disp = MI.getOperand(2).getImm();
9753 Register IndexReg = MI.getOperand(3).getReg();
9754 unsigned CCValid = MI.getOperand(4).getImm();
9755 unsigned CCMask = MI.getOperand(5).getImm();
9756 DebugLoc DL = MI.getDebugLoc();
9757
9758 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9759
9760 // ISel pattern matching also adds a load memory operand of the same
9761 // address, so take special care to find the storing memory operand.
9762 MachineMemOperand *MMO = nullptr;
9763 for (auto *I : MI.memoperands())
9764 if (I->isStore()) {
9765 MMO = I;
9766 break;
9767 }
9768
9769 // Use STOCOpcode if possible. We could use different store patterns in
9770 // order to avoid matching the index register, but the performance trade-offs
9771 // might be more complicated in that case.
9772 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9773 if (Invert)
9774 CCMask ^= CCValid;
9775
9776 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9777 .addReg(SrcReg)
9778 .add(Base)
9779 .addImm(Disp)
9780 .addImm(CCValid)
9781 .addImm(CCMask)
9782 .addMemOperand(MMO);
9783
9784 MI.eraseFromParent();
9785 return MBB;
9786 }
9787
9788 // Get the condition needed to branch around the store.
9789 if (!Invert)
9790 CCMask ^= CCValid;
9791
9792 MachineBasicBlock *StartMBB = MBB;
9794 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9795
9796 // Unless CC was killed in the CondStore instruction, mark it as
9797 // live-in to both FalseMBB and JoinMBB.
9798 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
9799 !checkCCKill(MI, JoinMBB)) {
9800 FalseMBB->addLiveIn(SystemZ::CC);
9801 JoinMBB->addLiveIn(SystemZ::CC);
9802 }
9803
9804 // StartMBB:
9805 // BRC CCMask, JoinMBB
9806 // # fallthrough to FalseMBB
9807 MBB = StartMBB;
9808 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9809 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9810 MBB->addSuccessor(JoinMBB);
9811 MBB->addSuccessor(FalseMBB);
9812
9813 // FalseMBB:
9814 // store %SrcReg, %Disp(%Index,%Base)
9815 // # fallthrough to JoinMBB
9816 MBB = FalseMBB;
9817 BuildMI(MBB, DL, TII->get(StoreOpcode))
9818 .addReg(SrcReg)
9819 .add(Base)
9820 .addImm(Disp)
9821 .addReg(IndexReg)
9822 .addMemOperand(MMO);
9823 MBB->addSuccessor(JoinMBB);
9824
9825 MI.eraseFromParent();
9826 return JoinMBB;
9827}
9828
9829// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
9831SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
9833 bool Unsigned) const {
9834 MachineFunction &MF = *MBB->getParent();
9835 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9837
9838 // Synthetic instruction to compare 128-bit values.
9839 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
9840 Register Op0 = MI.getOperand(0).getReg();
9841 Register Op1 = MI.getOperand(1).getReg();
9842
9843 MachineBasicBlock *StartMBB = MBB;
9845 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
9846
9847 // StartMBB:
9848 //
9849 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
9850 // Swap the inputs to get:
9851 // CC 1 if high(Op0) > high(Op1)
9852 // CC 2 if high(Op0) < high(Op1)
9853 // CC 0 if high(Op0) == high(Op1)
9854 //
9855 // If CC != 0, we'd done, so jump over the next instruction.
9856 //
9857 // VEC[L]G Op1, Op0
9858 // JNE JoinMBB
9859 // # fallthrough to HiEqMBB
9860 MBB = StartMBB;
9861 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
9862 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
9863 .addReg(Op1).addReg(Op0);
9864 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9866 MBB->addSuccessor(JoinMBB);
9867 MBB->addSuccessor(HiEqMBB);
9868
9869 // HiEqMBB:
9870 //
9871 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
9872 // Since we already know the high parts are equal, the CC
9873 // result will only depend on the low parts:
9874 // CC 1 if low(Op0) > low(Op1)
9875 // CC 3 if low(Op0) <= low(Op1)
9876 //
9877 // VCHLGS Tmp, Op0, Op1
9878 // # fallthrough to JoinMBB
9879 MBB = HiEqMBB;
9880 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
9881 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
9882 .addReg(Op0).addReg(Op1);
9883 MBB->addSuccessor(JoinMBB);
9884
9885 // Mark CC as live-in to JoinMBB.
9886 JoinMBB->addLiveIn(SystemZ::CC);
9887
9888 MI.eraseFromParent();
9889 return JoinMBB;
9890}
9891
9892// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
9893// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
9894// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
9895// whether the field should be inverted after performing BinOpcode (e.g. for
9896// NAND).
9897MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
9898 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
9899 bool Invert) const {
9900 MachineFunction &MF = *MBB->getParent();
9901 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9903
9904 // Extract the operands. Base can be a register or a frame index.
9905 // Src2 can be a register or immediate.
9906 Register Dest = MI.getOperand(0).getReg();
9907 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
9908 int64_t Disp = MI.getOperand(2).getImm();
9909 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
9910 Register BitShift = MI.getOperand(4).getReg();
9911 Register NegBitShift = MI.getOperand(5).getReg();
9912 unsigned BitSize = MI.getOperand(6).getImm();
9913 DebugLoc DL = MI.getDebugLoc();
9914
9915 // Get the right opcodes for the displacement.
9916 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9917 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9918 assert(LOpcode && CSOpcode && "Displacement out of range");
9919
9920 // Create virtual registers for temporary results.
9921 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9922 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9923 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9924 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9925 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9926
9927 // Insert a basic block for the main loop.
9928 MachineBasicBlock *StartMBB = MBB;
9930 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9931
9932 // StartMBB:
9933 // ...
9934 // %OrigVal = L Disp(%Base)
9935 // # fall through to LoopMBB
9936 MBB = StartMBB;
9937 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
9938 MBB->addSuccessor(LoopMBB);
9939
9940 // LoopMBB:
9941 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
9942 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
9943 // %RotatedNewVal = OP %RotatedOldVal, %Src2
9944 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
9945 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
9946 // JNE LoopMBB
9947 // # fall through to DoneMBB
9948 MBB = LoopMBB;
9949 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9950 .addReg(OrigVal).addMBB(StartMBB)
9951 .addReg(Dest).addMBB(LoopMBB);
9952 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
9953 .addReg(OldVal).addReg(BitShift).addImm(0);
9954 if (Invert) {
9955 // Perform the operation normally and then invert every bit of the field.
9956 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9957 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
9958 // XILF with the upper BitSize bits set.
9959 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
9960 .addReg(Tmp).addImm(-1U << (32 - BitSize));
9961 } else if (BinOpcode)
9962 // A simply binary operation.
9963 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
9964 .addReg(RotatedOldVal)
9965 .add(Src2);
9966 else
9967 // Use RISBG to rotate Src2 into position and use it to replace the
9968 // field in RotatedOldVal.
9969 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
9970 .addReg(RotatedOldVal).addReg(Src2.getReg())
9971 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
9972 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
9973 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
9974 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
9975 .addReg(OldVal)
9976 .addReg(NewVal)
9977 .add(Base)
9978 .addImm(Disp);
9979 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9981 MBB->addSuccessor(LoopMBB);
9982 MBB->addSuccessor(DoneMBB);
9983
9984 MI.eraseFromParent();
9985 return DoneMBB;
9986}
9987
9988// Implement EmitInstrWithCustomInserter for subword pseudo
9989// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
9990// instruction that should be used to compare the current field with the
9991// minimum or maximum value. KeepOldMask is the BRC condition-code mask
9992// for when the current field should be kept.
9993MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
9994 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
9995 unsigned KeepOldMask) const {
9996 MachineFunction &MF = *MBB->getParent();
9997 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9999
10000 // Extract the operands. Base can be a register or a frame index.
10001 Register Dest = MI.getOperand(0).getReg();
10002 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10003 int64_t Disp = MI.getOperand(2).getImm();
10004 Register Src2 = MI.getOperand(3).getReg();
10005 Register BitShift = MI.getOperand(4).getReg();
10006 Register NegBitShift = MI.getOperand(5).getReg();
10007 unsigned BitSize = MI.getOperand(6).getImm();
10008 DebugLoc DL = MI.getDebugLoc();
10009
10010 // Get the right opcodes for the displacement.
10011 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10012 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10013 assert(LOpcode && CSOpcode && "Displacement out of range");
10014
10015 // Create virtual registers for temporary results.
10016 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10017 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10018 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10019 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10020 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10021 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10022
10023 // Insert 3 basic blocks for the loop.
10024 MachineBasicBlock *StartMBB = MBB;
10026 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10027 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10028 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10029
10030 // StartMBB:
10031 // ...
10032 // %OrigVal = L Disp(%Base)
10033 // # fall through to LoopMBB
10034 MBB = StartMBB;
10035 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10036 MBB->addSuccessor(LoopMBB);
10037
10038 // LoopMBB:
10039 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10040 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10041 // CompareOpcode %RotatedOldVal, %Src2
10042 // BRC KeepOldMask, UpdateMBB
10043 MBB = LoopMBB;
10044 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10045 .addReg(OrigVal).addMBB(StartMBB)
10046 .addReg(Dest).addMBB(UpdateMBB);
10047 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10048 .addReg(OldVal).addReg(BitShift).addImm(0);
10049 BuildMI(MBB, DL, TII->get(CompareOpcode))
10050 .addReg(RotatedOldVal).addReg(Src2);
10051 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10052 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10053 MBB->addSuccessor(UpdateMBB);
10054 MBB->addSuccessor(UseAltMBB);
10055
10056 // UseAltMBB:
10057 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10058 // # fall through to UpdateMBB
10059 MBB = UseAltMBB;
10060 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10061 .addReg(RotatedOldVal).addReg(Src2)
10062 .addImm(32).addImm(31 + BitSize).addImm(0);
10063 MBB->addSuccessor(UpdateMBB);
10064
10065 // UpdateMBB:
10066 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10067 // [ %RotatedAltVal, UseAltMBB ]
10068 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10069 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10070 // JNE LoopMBB
10071 // # fall through to DoneMBB
10072 MBB = UpdateMBB;
10073 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10074 .addReg(RotatedOldVal).addMBB(LoopMBB)
10075 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10076 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10077 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10078 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10079 .addReg(OldVal)
10080 .addReg(NewVal)
10081 .add(Base)
10082 .addImm(Disp);
10083 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10085 MBB->addSuccessor(LoopMBB);
10086 MBB->addSuccessor(DoneMBB);
10087
10088 MI.eraseFromParent();
10089 return DoneMBB;
10090}
10091
10092// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10093// instruction MI.
10095SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10096 MachineBasicBlock *MBB) const {
10097 MachineFunction &MF = *MBB->getParent();
10098 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10100
10101 // Extract the operands. Base can be a register or a frame index.
10102 Register Dest = MI.getOperand(0).getReg();
10103 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10104 int64_t Disp = MI.getOperand(2).getImm();
10105 Register CmpVal = MI.getOperand(3).getReg();
10106 Register OrigSwapVal = MI.getOperand(4).getReg();
10107 Register BitShift = MI.getOperand(5).getReg();
10108 Register NegBitShift = MI.getOperand(6).getReg();
10109 int64_t BitSize = MI.getOperand(7).getImm();
10110 DebugLoc DL = MI.getDebugLoc();
10111
10112 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10113
10114 // Get the right opcodes for the displacement and zero-extension.
10115 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10116 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10117 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10118 assert(LOpcode && CSOpcode && "Displacement out of range");
10119
10120 // Create virtual registers for temporary results.
10121 Register OrigOldVal = MRI.createVirtualRegister(RC);
10122 Register OldVal = MRI.createVirtualRegister(RC);
10123 Register SwapVal = MRI.createVirtualRegister(RC);
10124 Register StoreVal = MRI.createVirtualRegister(RC);
10125 Register OldValRot = MRI.createVirtualRegister(RC);
10126 Register RetryOldVal = MRI.createVirtualRegister(RC);
10127 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10128
10129 // Insert 2 basic blocks for the loop.
10130 MachineBasicBlock *StartMBB = MBB;
10132 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10133 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10134
10135 // StartMBB:
10136 // ...
10137 // %OrigOldVal = L Disp(%Base)
10138 // # fall through to LoopMBB
10139 MBB = StartMBB;
10140 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10141 .add(Base)
10142 .addImm(Disp)
10143 .addReg(0);
10144 MBB->addSuccessor(LoopMBB);
10145
10146 // LoopMBB:
10147 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10148 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10149 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10150 // ^^ The low BitSize bits contain the field
10151 // of interest.
10152 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10153 // ^^ Replace the upper 32-BitSize bits of the
10154 // swap value with those that we loaded and rotated.
10155 // %Dest = LL[CH] %OldValRot
10156 // CR %Dest, %CmpVal
10157 // JNE DoneMBB
10158 // # Fall through to SetMBB
10159 MBB = LoopMBB;
10160 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10161 .addReg(OrigOldVal).addMBB(StartMBB)
10162 .addReg(RetryOldVal).addMBB(SetMBB);
10163 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10164 .addReg(OrigSwapVal).addMBB(StartMBB)
10165 .addReg(RetrySwapVal).addMBB(SetMBB);
10166 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10167 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10168 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10169 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10170 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10171 .addReg(OldValRot);
10172 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10173 .addReg(Dest).addReg(CmpVal);
10174 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10177 MBB->addSuccessor(DoneMBB);
10178 MBB->addSuccessor(SetMBB);
10179
10180 // SetMBB:
10181 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10182 // ^^ Rotate the new field to its proper position.
10183 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10184 // JNE LoopMBB
10185 // # fall through to ExitMBB
10186 MBB = SetMBB;
10187 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10188 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10189 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10190 .addReg(OldVal)
10191 .addReg(StoreVal)
10192 .add(Base)
10193 .addImm(Disp);
10194 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10196 MBB->addSuccessor(LoopMBB);
10197 MBB->addSuccessor(DoneMBB);
10198
10199 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10200 // to the block after the loop. At this point, CC may have been defined
10201 // either by the CR in LoopMBB or by the CS in SetMBB.
10202 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10203 DoneMBB->addLiveIn(SystemZ::CC);
10204
10205 MI.eraseFromParent();
10206 return DoneMBB;
10207}
10208
10209// Emit a move from two GR64s to a GR128.
10211SystemZTargetLowering::emitPair128(MachineInstr &MI,
10212 MachineBasicBlock *MBB) const {
10213 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10214 const DebugLoc &DL = MI.getDebugLoc();
10215
10216 Register Dest = MI.getOperand(0).getReg();
10217 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10218 .add(MI.getOperand(1))
10219 .addImm(SystemZ::subreg_h64)
10220 .add(MI.getOperand(2))
10221 .addImm(SystemZ::subreg_l64);
10222 MI.eraseFromParent();
10223 return MBB;
10224}
10225
10226// Emit an extension from a GR64 to a GR128. ClearEven is true
10227// if the high register of the GR128 value must be cleared or false if
10228// it's "don't care".
10229MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10231 bool ClearEven) const {
10232 MachineFunction &MF = *MBB->getParent();
10233 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10235 DebugLoc DL = MI.getDebugLoc();
10236
10237 Register Dest = MI.getOperand(0).getReg();
10238 Register Src = MI.getOperand(1).getReg();
10239 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10240
10241 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10242 if (ClearEven) {
10243 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10244 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10245
10246 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10247 .addImm(0);
10248 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10249 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10250 In128 = NewIn128;
10251 }
10252 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10253 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10254
10255 MI.eraseFromParent();
10256 return MBB;
10257}
10258
10260SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10262 unsigned Opcode, bool IsMemset) const {
10263 MachineFunction &MF = *MBB->getParent();
10264 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10266 DebugLoc DL = MI.getDebugLoc();
10267
10268 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10269 uint64_t DestDisp = MI.getOperand(1).getImm();
10270 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10271 uint64_t SrcDisp;
10272
10273 // Fold the displacement Disp if it is out of range.
10274 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10275 if (!isUInt<12>(Disp)) {
10276 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10277 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10278 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10279 .add(Base).addImm(Disp).addReg(0);
10280 Base = MachineOperand::CreateReg(Reg, false);
10281 Disp = 0;
10282 }
10283 };
10284
10285 if (!IsMemset) {
10286 SrcBase = earlyUseOperand(MI.getOperand(2));
10287 SrcDisp = MI.getOperand(3).getImm();
10288 } else {
10289 SrcBase = DestBase;
10290 SrcDisp = DestDisp++;
10291 foldDisplIfNeeded(DestBase, DestDisp);
10292 }
10293
10294 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10295 bool IsImmForm = LengthMO.isImm();
10296 bool IsRegForm = !IsImmForm;
10297
10298 // Build and insert one Opcode of Length, with special treatment for memset.
10299 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10301 MachineOperand DBase, uint64_t DDisp,
10303 unsigned Length) -> void {
10304 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10305 if (IsMemset) {
10306 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10307 if (ByteMO.isImm())
10308 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10309 .add(SBase).addImm(SDisp).add(ByteMO);
10310 else
10311 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10312 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10313 if (--Length == 0)
10314 return;
10315 }
10316 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10317 .add(DBase).addImm(DDisp).addImm(Length)
10318 .add(SBase).addImm(SDisp)
10319 .setMemRefs(MI.memoperands());
10320 };
10321
10322 bool NeedsLoop = false;
10323 uint64_t ImmLength = 0;
10324 Register LenAdjReg = SystemZ::NoRegister;
10325 if (IsImmForm) {
10326 ImmLength = LengthMO.getImm();
10327 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10328 if (ImmLength == 0) {
10329 MI.eraseFromParent();
10330 return MBB;
10331 }
10332 if (Opcode == SystemZ::CLC) {
10333 if (ImmLength > 3 * 256)
10334 // A two-CLC sequence is a clear win over a loop, not least because
10335 // it needs only one branch. A three-CLC sequence needs the same
10336 // number of branches as a loop (i.e. 2), but is shorter. That
10337 // brings us to lengths greater than 768 bytes. It seems relatively
10338 // likely that a difference will be found within the first 768 bytes,
10339 // so we just optimize for the smallest number of branch
10340 // instructions, in order to avoid polluting the prediction buffer
10341 // too much.
10342 NeedsLoop = true;
10343 } else if (ImmLength > 6 * 256)
10344 // The heuristic we use is to prefer loops for anything that would
10345 // require 7 or more MVCs. With these kinds of sizes there isn't much
10346 // to choose between straight-line code and looping code, since the
10347 // time will be dominated by the MVCs themselves.
10348 NeedsLoop = true;
10349 } else {
10350 NeedsLoop = true;
10351 LenAdjReg = LengthMO.getReg();
10352 }
10353
10354 // When generating more than one CLC, all but the last will need to
10355 // branch to the end when a difference is found.
10356 MachineBasicBlock *EndMBB =
10357 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10359 : nullptr);
10360
10361 if (NeedsLoop) {
10362 Register StartCountReg =
10363 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10364 if (IsImmForm) {
10365 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10366 ImmLength &= 255;
10367 } else {
10368 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10369 .addReg(LenAdjReg)
10370 .addReg(0)
10371 .addImm(8);
10372 }
10373
10374 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10375 auto loadZeroAddress = [&]() -> MachineOperand {
10376 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10377 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10378 return MachineOperand::CreateReg(Reg, false);
10379 };
10380 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10381 DestBase = loadZeroAddress();
10382 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10383 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10384
10385 MachineBasicBlock *StartMBB = nullptr;
10386 MachineBasicBlock *LoopMBB = nullptr;
10387 MachineBasicBlock *NextMBB = nullptr;
10388 MachineBasicBlock *DoneMBB = nullptr;
10389 MachineBasicBlock *AllDoneMBB = nullptr;
10390
10391 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10392 Register StartDestReg =
10393 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10394
10395 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10396 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10397 Register ThisDestReg =
10398 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10399 Register NextSrcReg = MRI.createVirtualRegister(RC);
10400 Register NextDestReg =
10401 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10402 RC = &SystemZ::GR64BitRegClass;
10403 Register ThisCountReg = MRI.createVirtualRegister(RC);
10404 Register NextCountReg = MRI.createVirtualRegister(RC);
10405
10406 if (IsRegForm) {
10407 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10408 StartMBB = SystemZ::emitBlockAfter(MBB);
10409 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10410 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10411 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10412
10413 // MBB:
10414 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10415 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10416 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10417 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10419 .addMBB(AllDoneMBB);
10420 MBB->addSuccessor(AllDoneMBB);
10421 if (!IsMemset)
10422 MBB->addSuccessor(StartMBB);
10423 else {
10424 // MemsetOneCheckMBB:
10425 // # Jump to MemsetOneMBB for a memset of length 1, or
10426 // # fall thru to StartMBB.
10427 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10428 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10429 MBB->addSuccessor(MemsetOneCheckMBB);
10430 MBB = MemsetOneCheckMBB;
10431 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10432 .addReg(LenAdjReg).addImm(-1);
10433 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10435 .addMBB(MemsetOneMBB);
10436 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10437 MBB->addSuccessor(StartMBB, {90, 100});
10438
10439 // MemsetOneMBB:
10440 // # Jump back to AllDoneMBB after a single MVI or STC.
10441 MBB = MemsetOneMBB;
10442 insertMemMemOp(MBB, MBB->end(),
10443 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10444 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10445 1);
10446 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10447 MBB->addSuccessor(AllDoneMBB);
10448 }
10449
10450 // StartMBB:
10451 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10452 MBB = StartMBB;
10453 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10454 .addReg(StartCountReg).addImm(0);
10455 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10457 .addMBB(DoneMBB);
10458 MBB->addSuccessor(DoneMBB);
10459 MBB->addSuccessor(LoopMBB);
10460 }
10461 else {
10462 StartMBB = MBB;
10463 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10464 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10465 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10466
10467 // StartMBB:
10468 // # fall through to LoopMBB
10469 MBB->addSuccessor(LoopMBB);
10470
10471 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10472 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10473 if (EndMBB && !ImmLength)
10474 // If the loop handled the whole CLC range, DoneMBB will be empty with
10475 // CC live-through into EndMBB, so add it as live-in.
10476 DoneMBB->addLiveIn(SystemZ::CC);
10477 }
10478
10479 // LoopMBB:
10480 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10481 // [ %NextDestReg, NextMBB ]
10482 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10483 // [ %NextSrcReg, NextMBB ]
10484 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10485 // [ %NextCountReg, NextMBB ]
10486 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10487 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10488 // ( JLH EndMBB )
10489 //
10490 // The prefetch is used only for MVC. The JLH is used only for CLC.
10491 MBB = LoopMBB;
10492 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10493 .addReg(StartDestReg).addMBB(StartMBB)
10494 .addReg(NextDestReg).addMBB(NextMBB);
10495 if (!HaveSingleBase)
10496 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10497 .addReg(StartSrcReg).addMBB(StartMBB)
10498 .addReg(NextSrcReg).addMBB(NextMBB);
10499 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10500 .addReg(StartCountReg).addMBB(StartMBB)
10501 .addReg(NextCountReg).addMBB(NextMBB);
10502 if (Opcode == SystemZ::MVC)
10503 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10505 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10506 insertMemMemOp(MBB, MBB->end(),
10507 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10508 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10509 if (EndMBB) {
10510 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10512 .addMBB(EndMBB);
10513 MBB->addSuccessor(EndMBB);
10514 MBB->addSuccessor(NextMBB);
10515 }
10516
10517 // NextMBB:
10518 // %NextDestReg = LA 256(%ThisDestReg)
10519 // %NextSrcReg = LA 256(%ThisSrcReg)
10520 // %NextCountReg = AGHI %ThisCountReg, -1
10521 // CGHI %NextCountReg, 0
10522 // JLH LoopMBB
10523 // # fall through to DoneMBB
10524 //
10525 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10526 MBB = NextMBB;
10527 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10528 .addReg(ThisDestReg).addImm(256).addReg(0);
10529 if (!HaveSingleBase)
10530 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10531 .addReg(ThisSrcReg).addImm(256).addReg(0);
10532 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10533 .addReg(ThisCountReg).addImm(-1);
10534 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10535 .addReg(NextCountReg).addImm(0);
10536 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10538 .addMBB(LoopMBB);
10539 MBB->addSuccessor(LoopMBB);
10540 MBB->addSuccessor(DoneMBB);
10541
10542 MBB = DoneMBB;
10543 if (IsRegForm) {
10544 // DoneMBB:
10545 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10546 // # Use EXecute Relative Long for the remainder of the bytes. The target
10547 // instruction of the EXRL will have a length field of 1 since 0 is an
10548 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10549 // 0xff) + 1.
10550 // # Fall through to AllDoneMBB.
10551 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10552 Register RemDestReg = HaveSingleBase ? RemSrcReg
10553 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10554 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10555 .addReg(StartDestReg).addMBB(StartMBB)
10556 .addReg(NextDestReg).addMBB(NextMBB);
10557 if (!HaveSingleBase)
10558 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10559 .addReg(StartSrcReg).addMBB(StartMBB)
10560 .addReg(NextSrcReg).addMBB(NextMBB);
10561 if (IsMemset)
10562 insertMemMemOp(MBB, MBB->end(),
10563 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10564 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10565 MachineInstrBuilder EXRL_MIB =
10566 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10567 .addImm(Opcode)
10568 .addReg(LenAdjReg)
10569 .addReg(RemDestReg).addImm(DestDisp)
10570 .addReg(RemSrcReg).addImm(SrcDisp);
10571 MBB->addSuccessor(AllDoneMBB);
10572 MBB = AllDoneMBB;
10573 if (Opcode != SystemZ::MVC) {
10574 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10575 if (EndMBB)
10576 MBB->addLiveIn(SystemZ::CC);
10577 }
10578 }
10579 MF.getProperties().resetNoPHIs();
10580 }
10581
10582 // Handle any remaining bytes with straight-line code.
10583 while (ImmLength > 0) {
10584 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10585 // The previous iteration might have created out-of-range displacements.
10586 // Apply them using LA/LAY if so.
10587 foldDisplIfNeeded(DestBase, DestDisp);
10588 foldDisplIfNeeded(SrcBase, SrcDisp);
10589 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10590 DestDisp += ThisLength;
10591 SrcDisp += ThisLength;
10592 ImmLength -= ThisLength;
10593 // If there's another CLC to go, branch to the end if a difference
10594 // was found.
10595 if (EndMBB && ImmLength > 0) {
10597 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10599 .addMBB(EndMBB);
10600 MBB->addSuccessor(EndMBB);
10601 MBB->addSuccessor(NextMBB);
10602 MBB = NextMBB;
10603 }
10604 }
10605 if (EndMBB) {
10606 MBB->addSuccessor(EndMBB);
10607 MBB = EndMBB;
10608 MBB->addLiveIn(SystemZ::CC);
10609 }
10610
10611 MI.eraseFromParent();
10612 return MBB;
10613}
10614
10615// Decompose string pseudo-instruction MI into a loop that continually performs
10616// Opcode until CC != 3.
10617MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10618 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10619 MachineFunction &MF = *MBB->getParent();
10620 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10622 DebugLoc DL = MI.getDebugLoc();
10623
10624 uint64_t End1Reg = MI.getOperand(0).getReg();
10625 uint64_t Start1Reg = MI.getOperand(1).getReg();
10626 uint64_t Start2Reg = MI.getOperand(2).getReg();
10627 uint64_t CharReg = MI.getOperand(3).getReg();
10628
10629 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10630 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10631 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10632 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10633
10634 MachineBasicBlock *StartMBB = MBB;
10636 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10637
10638 // StartMBB:
10639 // # fall through to LoopMBB
10640 MBB->addSuccessor(LoopMBB);
10641
10642 // LoopMBB:
10643 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10644 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10645 // R0L = %CharReg
10646 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10647 // JO LoopMBB
10648 // # fall through to DoneMBB
10649 //
10650 // The load of R0L can be hoisted by post-RA LICM.
10651 MBB = LoopMBB;
10652
10653 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10654 .addReg(Start1Reg).addMBB(StartMBB)
10655 .addReg(End1Reg).addMBB(LoopMBB);
10656 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10657 .addReg(Start2Reg).addMBB(StartMBB)
10658 .addReg(End2Reg).addMBB(LoopMBB);
10659 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10660 BuildMI(MBB, DL, TII->get(Opcode))
10661 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10662 .addReg(This1Reg).addReg(This2Reg);
10663 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10665 MBB->addSuccessor(LoopMBB);
10666 MBB->addSuccessor(DoneMBB);
10667
10668 DoneMBB->addLiveIn(SystemZ::CC);
10669
10670 MI.eraseFromParent();
10671 return DoneMBB;
10672}
10673
10674// Update TBEGIN instruction with final opcode and register clobbers.
10675MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10676 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10677 bool NoFloat) const {
10678 MachineFunction &MF = *MBB->getParent();
10679 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10680 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10681
10682 // Update opcode.
10683 MI.setDesc(TII->get(Opcode));
10684
10685 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10686 // Make sure to add the corresponding GRSM bits if they are missing.
10687 uint64_t Control = MI.getOperand(2).getImm();
10688 static const unsigned GPRControlBit[16] = {
10689 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10690 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10691 };
10692 Control |= GPRControlBit[15];
10693 if (TFI->hasFP(MF))
10694 Control |= GPRControlBit[11];
10695 MI.getOperand(2).setImm(Control);
10696
10697 // Add GPR clobbers.
10698 for (int I = 0; I < 16; I++) {
10699 if ((Control & GPRControlBit[I]) == 0) {
10700 unsigned Reg = SystemZMC::GR64Regs[I];
10701 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10702 }
10703 }
10704
10705 // Add FPR/VR clobbers.
10706 if (!NoFloat && (Control & 4) != 0) {
10707 if (Subtarget.hasVector()) {
10708 for (unsigned Reg : SystemZMC::VR128Regs) {
10709 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10710 }
10711 } else {
10712 for (unsigned Reg : SystemZMC::FP64Regs) {
10713 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10714 }
10715 }
10716 }
10717
10718 return MBB;
10719}
10720
10721MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10722 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10723 MachineFunction &MF = *MBB->getParent();
10725 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10726 DebugLoc DL = MI.getDebugLoc();
10727
10728 Register SrcReg = MI.getOperand(0).getReg();
10729
10730 // Create new virtual register of the same class as source.
10731 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10732 Register DstReg = MRI->createVirtualRegister(RC);
10733
10734 // Replace pseudo with a normal load-and-test that models the def as
10735 // well.
10736 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10737 .addReg(SrcReg)
10738 .setMIFlags(MI.getFlags());
10739 MI.eraseFromParent();
10740
10741 return MBB;
10742}
10743
10744MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10746 MachineFunction &MF = *MBB->getParent();
10748 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10749 DebugLoc DL = MI.getDebugLoc();
10750 const unsigned ProbeSize = getStackProbeSize(MF);
10751 Register DstReg = MI.getOperand(0).getReg();
10752 Register SizeReg = MI.getOperand(2).getReg();
10753
10754 MachineBasicBlock *StartMBB = MBB;
10756 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10757 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10758 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10759 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10760
10763
10764 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10765 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10766
10767 // LoopTestMBB
10768 // BRC TailTestMBB
10769 // # fallthrough to LoopBodyMBB
10770 StartMBB->addSuccessor(LoopTestMBB);
10771 MBB = LoopTestMBB;
10772 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10773 .addReg(SizeReg)
10774 .addMBB(StartMBB)
10775 .addReg(IncReg)
10776 .addMBB(LoopBodyMBB);
10777 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10778 .addReg(PHIReg)
10779 .addImm(ProbeSize);
10780 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10782 .addMBB(TailTestMBB);
10783 MBB->addSuccessor(LoopBodyMBB);
10784 MBB->addSuccessor(TailTestMBB);
10785
10786 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
10787 // J LoopTestMBB
10788 MBB = LoopBodyMBB;
10789 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
10790 .addReg(PHIReg)
10791 .addImm(ProbeSize);
10792 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
10793 .addReg(SystemZ::R15D)
10794 .addImm(ProbeSize);
10795 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10796 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
10797 .setMemRefs(VolLdMMO);
10798 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
10799 MBB->addSuccessor(LoopTestMBB);
10800
10801 // TailTestMBB
10802 // BRC DoneMBB
10803 // # fallthrough to TailMBB
10804 MBB = TailTestMBB;
10805 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10806 .addReg(PHIReg)
10807 .addImm(0);
10808 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10810 .addMBB(DoneMBB);
10811 MBB->addSuccessor(TailMBB);
10812 MBB->addSuccessor(DoneMBB);
10813
10814 // TailMBB
10815 // # fallthrough to DoneMBB
10816 MBB = TailMBB;
10817 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
10818 .addReg(SystemZ::R15D)
10819 .addReg(PHIReg);
10820 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10821 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
10822 .setMemRefs(VolLdMMO);
10823 MBB->addSuccessor(DoneMBB);
10824
10825 // DoneMBB
10826 MBB = DoneMBB;
10827 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
10828 .addReg(SystemZ::R15D);
10829
10830 MI.eraseFromParent();
10831 return DoneMBB;
10832}
10833
10834SDValue SystemZTargetLowering::
10835getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
10837 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
10838 SDLoc DL(SP);
10839 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
10840 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
10841}
10842
10845 switch (MI.getOpcode()) {
10846 case SystemZ::ADJCALLSTACKDOWN:
10847 case SystemZ::ADJCALLSTACKUP:
10848 return emitAdjCallStack(MI, MBB);
10849
10850 case SystemZ::Select32:
10851 case SystemZ::Select64:
10852 case SystemZ::Select128:
10853 case SystemZ::SelectF32:
10854 case SystemZ::SelectF64:
10855 case SystemZ::SelectF128:
10856 case SystemZ::SelectVR32:
10857 case SystemZ::SelectVR64:
10858 case SystemZ::SelectVR128:
10859 return emitSelect(MI, MBB);
10860
10861 case SystemZ::CondStore8Mux:
10862 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
10863 case SystemZ::CondStore8MuxInv:
10864 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
10865 case SystemZ::CondStore16Mux:
10866 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
10867 case SystemZ::CondStore16MuxInv:
10868 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
10869 case SystemZ::CondStore32Mux:
10870 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
10871 case SystemZ::CondStore32MuxInv:
10872 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
10873 case SystemZ::CondStore8:
10874 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
10875 case SystemZ::CondStore8Inv:
10876 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
10877 case SystemZ::CondStore16:
10878 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
10879 case SystemZ::CondStore16Inv:
10880 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
10881 case SystemZ::CondStore32:
10882 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
10883 case SystemZ::CondStore32Inv:
10884 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
10885 case SystemZ::CondStore64:
10886 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
10887 case SystemZ::CondStore64Inv:
10888 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
10889 case SystemZ::CondStoreF32:
10890 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
10891 case SystemZ::CondStoreF32Inv:
10892 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
10893 case SystemZ::CondStoreF64:
10894 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
10895 case SystemZ::CondStoreF64Inv:
10896 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
10897
10898 case SystemZ::SCmp128Hi:
10899 return emitICmp128Hi(MI, MBB, false);
10900 case SystemZ::UCmp128Hi:
10901 return emitICmp128Hi(MI, MBB, true);
10902
10903 case SystemZ::PAIR128:
10904 return emitPair128(MI, MBB);
10905 case SystemZ::AEXT128:
10906 return emitExt128(MI, MBB, false);
10907 case SystemZ::ZEXT128:
10908 return emitExt128(MI, MBB, true);
10909
10910 case SystemZ::ATOMIC_SWAPW:
10911 return emitAtomicLoadBinary(MI, MBB, 0);
10912
10913 case SystemZ::ATOMIC_LOADW_AR:
10914 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
10915 case SystemZ::ATOMIC_LOADW_AFI:
10916 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
10917
10918 case SystemZ::ATOMIC_LOADW_SR:
10919 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
10920
10921 case SystemZ::ATOMIC_LOADW_NR:
10922 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
10923 case SystemZ::ATOMIC_LOADW_NILH:
10924 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
10925
10926 case SystemZ::ATOMIC_LOADW_OR:
10927 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
10928 case SystemZ::ATOMIC_LOADW_OILH:
10929 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
10930
10931 case SystemZ::ATOMIC_LOADW_XR:
10932 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
10933 case SystemZ::ATOMIC_LOADW_XILF:
10934 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
10935
10936 case SystemZ::ATOMIC_LOADW_NRi:
10937 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
10938 case SystemZ::ATOMIC_LOADW_NILHi:
10939 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
10940
10941 case SystemZ::ATOMIC_LOADW_MIN:
10942 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
10943 case SystemZ::ATOMIC_LOADW_MAX:
10944 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
10945 case SystemZ::ATOMIC_LOADW_UMIN:
10946 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
10947 case SystemZ::ATOMIC_LOADW_UMAX:
10948 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
10949
10950 case SystemZ::ATOMIC_CMP_SWAPW:
10951 return emitAtomicCmpSwapW(MI, MBB);
10952 case SystemZ::MVCImm:
10953 case SystemZ::MVCReg:
10954 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
10955 case SystemZ::NCImm:
10956 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
10957 case SystemZ::OCImm:
10958 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
10959 case SystemZ::XCImm:
10960 case SystemZ::XCReg:
10961 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
10962 case SystemZ::CLCImm:
10963 case SystemZ::CLCReg:
10964 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
10965 case SystemZ::MemsetImmImm:
10966 case SystemZ::MemsetImmReg:
10967 case SystemZ::MemsetRegImm:
10968 case SystemZ::MemsetRegReg:
10969 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
10970 case SystemZ::CLSTLoop:
10971 return emitStringWrapper(MI, MBB, SystemZ::CLST);
10972 case SystemZ::MVSTLoop:
10973 return emitStringWrapper(MI, MBB, SystemZ::MVST);
10974 case SystemZ::SRSTLoop:
10975 return emitStringWrapper(MI, MBB, SystemZ::SRST);
10976 case SystemZ::TBEGIN:
10977 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
10978 case SystemZ::TBEGIN_nofloat:
10979 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
10980 case SystemZ::TBEGINC:
10981 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
10982 case SystemZ::LTEBRCompare_Pseudo:
10983 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
10984 case SystemZ::LTDBRCompare_Pseudo:
10985 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
10986 case SystemZ::LTXBRCompare_Pseudo:
10987 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
10988
10989 case SystemZ::PROBED_ALLOCA:
10990 return emitProbedAlloca(MI, MBB);
10991 case SystemZ::EH_SjLj_SetJmp:
10992 return emitEHSjLjSetJmp(MI, MBB);
10993 case SystemZ::EH_SjLj_LongJmp:
10994 return emitEHSjLjLongJmp(MI, MBB);
10995
10996 case TargetOpcode::STACKMAP:
10997 case TargetOpcode::PATCHPOINT:
10998 return emitPatchPoint(MI, MBB);
10999
11000 default:
11001 llvm_unreachable("Unexpected instr type to insert");
11002 }
11003}
11004
11005// This is only used by the isel schedulers, and is needed only to prevent
11006// compiler from crashing when list-ilp is used.
11007const TargetRegisterClass *
11008SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11009 if (VT == MVT::Untyped)
11010 return &SystemZ::ADDR128BitRegClass;
11012}
11013
11014SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11015 SelectionDAG &DAG) const {
11016 SDLoc dl(Op);
11017 /*
11018 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11019 settings:
11020 00 Round to nearest
11021 01 Round to 0
11022 10 Round to +inf
11023 11 Round to -inf
11024
11025 FLT_ROUNDS, on the other hand, expects the following:
11026 -1 Undefined
11027 0 Round to 0
11028 1 Round to nearest
11029 2 Round to +inf
11030 3 Round to -inf
11031 */
11032
11033 // Save FPC to register.
11034 SDValue Chain = Op.getOperand(0);
11035 SDValue EFPC(
11036 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11037 Chain = EFPC.getValue(1);
11038
11039 // Transform as necessary
11040 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11041 DAG.getConstant(3, dl, MVT::i32));
11042 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11043 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11044 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11045 DAG.getConstant(1, dl, MVT::i32)));
11046
11047 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11048 DAG.getConstant(1, dl, MVT::i32));
11049 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11050
11051 return DAG.getMergeValues({RetVal, Chain}, dl);
11052}
11053
11054SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11055 SelectionDAG &DAG) const {
11056 EVT VT = Op.getValueType();
11057 Op = Op.getOperand(0);
11058 EVT OpVT = Op.getValueType();
11059
11060 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11061
11062 SDLoc DL(Op);
11063
11064 // load a 0 vector for the third operand of VSUM.
11065 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11066
11067 // execute VSUM.
11068 switch (OpVT.getScalarSizeInBits()) {
11069 case 8:
11070 case 16:
11071 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11072 [[fallthrough]];
11073 case 32:
11074 case 64:
11075 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11076 DAG.getBitcast(Op.getValueType(), Zero));
11077 break;
11078 case 128:
11079 break; // VSUM over v1i128 should not happen and would be a noop
11080 default:
11081 llvm_unreachable("Unexpected scalar size.");
11082 }
11083 // Cast to original vector type, retrieve last element.
11084 return DAG.getNode(
11085 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11086 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11087}
11088
11090 FunctionType *FT = F->getFunctionType();
11091 const AttributeList &Attrs = F->getAttributes();
11092 if (Attrs.hasRetAttrs())
11093 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11094 OS << *F->getReturnType() << " @" << F->getName() << "(";
11095 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11096 if (I)
11097 OS << ", ";
11098 OS << *FT->getParamType(I);
11099 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11100 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11101 if (ArgAttrs.hasAttribute(A))
11103 }
11104 OS << ")\n";
11105}
11106
11107bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11108 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11109 if (Itr == IsInternalCache.end())
11110 Itr = IsInternalCache
11111 .insert(std::pair<const Function *, bool>(
11112 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11113 .first;
11114 return Itr->second;
11115}
11116
11117void SystemZTargetLowering::
11118verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11119 const Function *F, SDValue Callee) const {
11120 // Temporarily only do the check when explicitly requested, until it can be
11121 // enabled by default.
11123 return;
11124
11125 bool IsInternal = false;
11126 const Function *CalleeFn = nullptr;
11127 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11128 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11129 IsInternal = isInternal(CalleeFn);
11130 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11131 errs() << "ERROR: Missing extension attribute of passed "
11132 << "value in call to function:\n" << "Callee: ";
11133 if (CalleeFn != nullptr)
11134 printFunctionArgExts(CalleeFn, errs());
11135 else
11136 errs() << "-\n";
11137 errs() << "Caller: ";
11139 llvm_unreachable("");
11140 }
11141}
11142
11143void SystemZTargetLowering::
11144verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11145 const Function *F) const {
11146 // Temporarily only do the check when explicitly requested, until it can be
11147 // enabled by default.
11149 return;
11150
11151 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11152 errs() << "ERROR: Missing extension attribute of returned "
11153 << "value from function:\n";
11155 llvm_unreachable("");
11156 }
11157}
11158
11159// Verify that narrow integer arguments are extended as required by the ABI.
11160// Return false if an error is found.
11161bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11162 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11163 if (!Subtarget.isTargetELF())
11164 return true;
11165
11168 return true;
11169 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11170 return true;
11171
11172 for (unsigned i = 0; i < Outs.size(); ++i) {
11173 MVT VT = Outs[i].VT;
11174 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11175 if (VT.isInteger()) {
11176 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11177 "Unexpected integer argument VT.");
11178 if (VT == MVT::i32 &&
11179 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11180 return false;
11181 }
11182 }
11183
11184 return true;
11185}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
raw_pwrite_stream & OS
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:322
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
@ Add
*p = old + v
Definition: Instructions.h:725
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
BinOp getOperation() const
Definition: Instructions.h:819
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
Definition: Attributes.cpp:322
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
The address of a basic block.
Definition: Constants.h:899
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:951
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:774
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:623
bool hasLocalLinkage() const
Definition: GlobalValue.h:530
bool hasPrivateLinkage() const
Definition: GlobalValue.h:529
bool hasInternalLinkage() const
Definition: GlobalValue.h:528
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:500
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:902
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void reserve(size_type N)
Definition: SmallVector.h:664
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:480
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:694
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
iterator end() const
Definition: StringRef.h:122
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool useSoftFloat() const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
user_iterator user_begin()
Definition: Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
int getNumOccurrences() const
Definition: CommandLine.h:400
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
self_iterator getIterator()
Definition: ilist_node.h:134
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:461
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1379
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1381
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ STRICT_FCEIL
Definition: ISDOpcodes.h:454
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1382
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1364
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:464
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1338
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1343
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1377
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1378
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:347
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1380
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:343
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:458
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1375
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:463
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:452
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:453
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1383
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ ConstantPool
Definition: ISDOpcodes.h:92
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ STRICT_FROUND
Definition: ISDOpcodes.h:456
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:477
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:455
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:457
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1373
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:470
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1374
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1292
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1372
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:903
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:979
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ AssertZext
Definition: ISDOpcodes.h:63
@ STRICT_FRINT
Definition: ISDOpcodes.h:450
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1439
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:149
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
const unsigned CCMASK_VCMP_NONE
Definition: SystemZ.h:103
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:213
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
support::ulittle32_t Word
Definition: IRSymtab.h:53
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:349
@ Offset
Definition: DWP.cpp:477
@ Length
Definition: DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
@ Done
Definition: Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:252
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:295
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:577
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:280
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:179
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:173
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:138
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
const uint32_t * getNoPreservedMask() const override
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.