LLVM 21.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
144 cl::desc("max depth when checking alias info in GatherAllAliases()"));
145
147 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
148 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
149 "function to use initial-exec"));
150
151STATISTIC(NumTailCalls, "Number of tail calls");
152STATISTIC(NumSiblingCalls, "Number of sibling calls");
153STATISTIC(ShufflesHandledWithVPERM,
154 "Number of shuffles lowered to a VPERM or XXPERM");
155STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
156
157static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
158
159static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
160
161static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
162
163// A faster local-[exec|dynamic] TLS access sequence (enabled with the
164// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
165// variables; consistent with the IBM XL compiler, we apply a max size of
166// slightly under 32KB.
168
169// FIXME: Remove this once the bug has been fixed!
171
173 const PPCSubtarget &STI)
174 : TargetLowering(TM), Subtarget(STI) {
175 // Initialize map that relates the PPC addressing modes to the computed flags
176 // of a load/store instruction. The map is used to determine the optimal
177 // addressing mode when selecting load and stores.
178 initializeAddrModeMap();
179 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
180 // arguments are at least 4/8 bytes aligned.
181 bool isPPC64 = Subtarget.isPPC64();
182 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
183 const MVT RegVT = Subtarget.getScalarIntVT();
184
185 // Set up the register classes.
186 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
187 if (!useSoftFloat()) {
188 if (hasSPE()) {
189 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
190 // EFPU2 APU only supports f32
191 if (!Subtarget.hasEFPU2())
192 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
193 } else {
194 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
195 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
196 }
197 }
198
200
201 // On P10, the default lowering generates better code using the
202 // setbc instruction.
203 if (!Subtarget.hasP10Vector()) {
205 if (isPPC64)
207 }
208
209 // Match BITREVERSE to customized fast code sequence in the td file.
212
213 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
215
216 // Custom lower inline assembly to check for special registers.
219
220 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
221 for (MVT VT : MVT::integer_valuetypes()) {
224 }
225
226 if (Subtarget.isISA3_0()) {
227 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
228 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
229 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
230 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
231 } else {
232 // No extending loads from f16 or HW conversions back and forth.
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
236 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
239 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
240 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
241 }
242
243 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
244
245 // PowerPC has pre-inc load and store's.
256 if (!Subtarget.hasSPE()) {
261 }
262
263 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
264 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
265 for (MVT VT : ScalarIntVTs) {
270 }
271
272 if (Subtarget.useCRBits()) {
274
275 if (isPPC64 || Subtarget.hasFPCVT()) {
280
282 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
284 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
285
290
292 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
294 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
295 } else {
300 }
301
302 // PowerPC does not support direct load/store of condition registers.
305
306 // FIXME: Remove this once the ANDI glue bug is fixed:
307 if (ANDIGlueBug)
309
310 for (MVT VT : MVT::integer_valuetypes()) {
313 setTruncStoreAction(VT, MVT::i1, Expand);
314 }
315
316 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317 }
318
319 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320 // PPC (the libcall is not available).
325
326 // We do not currently implement these libm ops for PowerPC.
327 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333
334 // PowerPC has no SREM/UREM instructions unless we are on P9
335 // On P9 we may use a hardware instruction to compute the remainder.
336 // When the result of both the remainder and the division is required it is
337 // more efficient to compute the remainder from the result of the division
338 // rather than use the remainder instruction. The instructions are legalized
339 // directly because the DivRemPairsPass performs the transformation at the IR
340 // level.
341 if (Subtarget.isISA3_0()) {
346 } else {
351 }
352
353 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362
363 // Handle constrained floating-point operations of scalar.
364 // TODO: Handle SPE specific operation.
370
375
376 if (!Subtarget.hasSPE()) {
379 }
380
381 if (Subtarget.hasVSX()) {
384 }
385
386 if (Subtarget.hasFSQRT()) {
389 }
390
391 if (Subtarget.hasFPRND()) {
396
401 }
402
403 // We don't support sin/cos/sqrt/fmod/pow
414
415 // MASS transformation for LLVM intrinsics with replicating fast-math flag
416 // to be consistent to PPCGenScalarMASSEntries pass
417 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
430 }
431
432 if (Subtarget.hasSPE()) {
435 } else {
436 setOperationAction(ISD::FMA , MVT::f64, Legal);
437 setOperationAction(ISD::FMA , MVT::f32, Legal);
440 }
441
442 if (Subtarget.hasSPE())
443 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
444
445 // If we're enabling GP optimizations, use hardware square root
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448 Subtarget.hasFRE()))
450
451 if (!Subtarget.hasFSQRT() &&
452 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453 Subtarget.hasFRES()))
455
456 if (Subtarget.hasFCPSGN()) {
459 } else {
462 }
463
464 if (Subtarget.hasFPRND()) {
469
474 }
475
476 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477 // instruction xxbrd to speed up scalar BSWAP64.
478 if (Subtarget.isISA3_1()) {
481 } else {
484 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
485 }
486
487 // CTPOP or CTTZ were introduced in P8/P9 respectively
488 if (Subtarget.isISA3_0()) {
489 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
490 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
491 } else {
492 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
493 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
494 }
495
496 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
499 } else {
502 }
503
504 // PowerPC does not have ROTR
507
508 if (!Subtarget.useCRBits()) {
509 // PowerPC does not have Select
514 }
515
516 // PowerPC wants to turn select_cc of FP into fsel when possible.
519
520 // PowerPC wants to optimize integer setcc a bit
521 if (!Subtarget.useCRBits())
523
524 if (Subtarget.hasFPU()) {
528
532 }
533
534 // PowerPC does not have BRCOND which requires SetCC
535 if (!Subtarget.useCRBits())
537
539
540 if (Subtarget.hasSPE()) {
541 // SPE has built-in conversions
548
549 // SPE supports signaling compare of f32/f64.
552 } else {
553 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
556
557 // PowerPC does not have [U|S]INT_TO_FP
562 }
563
564 if (Subtarget.hasDirectMove() && isPPC64) {
569 if (TM.Options.UnsafeFPMath) {
578 }
579 } else {
584 }
585
586 // We cannot sextinreg(i1). Expand to shifts.
588
589 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
590 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
591 // support continuation, user-level threading, and etc.. As a result, no
592 // other SjLj exception interfaces are implemented and please don't build
593 // your own exception handling based on them.
594 // LLVM/Clang supports zero-cost DWARF exception handling.
597
598 // We want to legalize GlobalAddress and ConstantPool nodes into the
599 // appropriate instructions to materialize the address.
610
611 // TRAP is legal.
612 setOperationAction(ISD::TRAP, MVT::Other, Legal);
613
614 // TRAMPOLINE is custom lowered.
617
618 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
620
621 if (Subtarget.is64BitELFABI()) {
622 // VAARG always uses double-word chunks, so promote anything smaller.
624 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
628 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
630 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
632 } else if (Subtarget.is32BitELFABI()) {
633 // VAARG is custom lowered with the 32-bit SVR4 ABI.
636 } else
638
639 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
640 if (Subtarget.is32BitELFABI())
642 else
644
645 // Use the default implementation.
646 setOperationAction(ISD::VAEND , MVT::Other, Expand);
655
656 // We want to custom lower some of our intrinsics.
662
663 // To handle counter-based loop conditions.
665
670
671 // Comparisons that require checking two conditions.
672 if (Subtarget.hasSPE()) {
677 }
690
693
694 if (Subtarget.has64BitSupport()) {
695 // They also have instructions for converting between i64 and fp.
704 // This is just the low 32 bits of a (signed) fp->i64 conversion.
705 // We cannot do this with Promote because i64 is not a legal type.
708
709 if (Subtarget.hasLFIWAX() || isPPC64) {
712 }
713 } else {
714 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
715 if (Subtarget.hasSPE()) {
718 } else {
721 }
722 }
723
724 // With the instructions enabled under FPCVT, we can do everything.
725 if (Subtarget.hasFPCVT()) {
726 if (Subtarget.has64BitSupport()) {
735 }
736
745 }
746
747 if (Subtarget.use64BitRegs()) {
748 // 64-bit PowerPC implementations can support i64 types directly
749 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
750 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
752 // 64-bit PowerPC wants to expand i128 shifts itself.
756 } else {
757 // 32-bit PowerPC wants to expand i64 shifts itself.
761 }
762
763 // PowerPC has better expansions for funnel shifts than the generic
764 // TargetLowering::expandFunnelShift.
765 if (Subtarget.has64BitSupport()) {
768 }
771
772 if (Subtarget.hasVSX()) {
777 }
778
779 if (Subtarget.hasAltivec()) {
780 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
785 }
786 // First set operation action for all vector types to expand. Then we
787 // will selectively turn on ones that can be effectively codegen'd.
789 // add/sub are legal for all supported vector VT's.
792
793 // For v2i64, these are only valid with P8Vector. This is corrected after
794 // the loop.
795 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
800 }
801 else {
806 }
807
808 if (Subtarget.hasVSX()) {
811 }
812
813 // Vector instructions introduced in P8
814 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
817 }
818 else {
821 }
822
823 // Vector instructions introduced in P9
824 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
826 else
828
829 // We promote all shuffles to v16i8.
831 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
832
833 // We promote all non-typed operations to v4i32.
835 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
837 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
839 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
841 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
843 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
846 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
848 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
849
850 // No other operations are legal.
889
890 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
891 setTruncStoreAction(VT, InnerVT, Expand);
894 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
895 }
896 }
898 if (!Subtarget.hasP8Vector()) {
899 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
900 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
901 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
903 }
904
905 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
906 // with merges, splats, etc.
908
909 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
910 // are cheap, so handle them before they get expanded to scalar.
916
917 setOperationAction(ISD::AND , MVT::v4i32, Legal);
918 setOperationAction(ISD::OR , MVT::v4i32, Legal);
919 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
920 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
922 Subtarget.useCRBits() ? Legal : Expand);
923 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
933 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
936
937 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
938 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
939 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
940 if (Subtarget.hasAltivec())
941 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
943 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
944 if (Subtarget.hasP8Altivec())
945 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
946
947 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
948 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
951
952 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
953 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
954
955 if (Subtarget.hasVSX()) {
956 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
957 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
959 }
960
961 if (Subtarget.hasP8Altivec())
962 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
963 else
964 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
965
966 if (Subtarget.isISA3_1()) {
967 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
968 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
971 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
972 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
973 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
975 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
977 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
979 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
981 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
983 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
985 }
986
987 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
988 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
989
992 // LE is P8+/64-bit so direct moves are supported and these operations
993 // are legal. The custom transformation requires 64-bit since we need a
994 // pair of stores that will cover a 128-bit load for P10.
995 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
999 }
1000
1005
1006 // Altivec does not contain unordered floating-point compare instructions
1007 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1008 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1011
1012 if (Subtarget.hasVSX()) {
1015 if (Subtarget.hasP8Vector()) {
1018 }
1019 if (Subtarget.hasDirectMove() && isPPC64) {
1028 }
1030
1031 // The nearbyint variants are not allowed to raise the inexact exception
1032 // so we can only code-gen them with unsafe math.
1033 if (TM.Options.UnsafeFPMath) {
1036 }
1037
1038 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1039 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1046
1048 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1052
1053 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1054 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1055
1056 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1057 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1058
1059 // Share the Altivec comparison restrictions.
1060 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1061 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1064
1065 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1066 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1067
1069
1070 if (Subtarget.hasP8Vector())
1071 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1072
1073 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1074
1075 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1076 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1078
1079 if (Subtarget.hasP8Altivec()) {
1080 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1081 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1083
1084 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1085 // SRL, but not for SRA because of the instructions available:
1086 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1087 // doing
1088 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1089 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1091
1092 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1093 }
1094 else {
1095 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1096 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1098
1099 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1100
1101 // VSX v2i64 only supports non-arithmetic operations.
1102 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1103 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1104 }
1105
1106 if (Subtarget.isISA3_1())
1107 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1108 else
1109 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1110
1111 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1112 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1114 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1115
1117
1126
1127 // Custom handling for partial vectors of integers converted to
1128 // floating point. We already have optimal handling for v2i32 through
1129 // the DAG combine, so those aren't necessary.
1146
1147 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1148 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1149 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1150 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1153
1156
1157 // Handle constrained floating-point operations of vector.
1158 // The predictor is `hasVSX` because altivec instruction has
1159 // no exception but VSX vector instruction has.
1173
1187
1188 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1189 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1190
1191 for (MVT FPT : MVT::fp_valuetypes())
1192 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1193
1194 // Expand the SELECT to SELECT_CC
1196
1197 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1198 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1199
1200 // No implementation for these ops for PowerPC.
1202 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1203 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1204 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1206 setOperationAction(ISD::FREM, MVT::f128, Expand);
1207 }
1208
1209 if (Subtarget.hasP8Altivec()) {
1210 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1211 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1212 }
1213
1214 if (Subtarget.hasP9Vector()) {
1217
1218 // Test data class instructions store results in CR bits.
1219 if (Subtarget.useCRBits()) {
1224 }
1225
1226 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1227 // SRL, but not for SRA because of the instructions available:
1228 // VS{RL} and VS{RL}O.
1229 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1230 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1232
1233 setOperationAction(ISD::FADD, MVT::f128, Legal);
1234 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1235 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1236 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238
1239 setOperationAction(ISD::FMA, MVT::f128, Legal);
1246
1248 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1253
1257
1258 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1276 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1279 } else if (Subtarget.hasVSX()) {
1282
1283 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1284 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1285
1286 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1287 // fp_to_uint and int_to_fp.
1290
1291 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1292 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1293 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1294 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FMA, MVT::f128, Expand);
1298
1299 // Expand the fp_extend if the target type is fp128.
1302
1303 // Expand the fp_round if the source type is fp128.
1304 for (MVT VT : {MVT::f32, MVT::f64}) {
1307 }
1308
1313
1314 // Lower following f128 select_cc pattern:
1315 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1317
1318 // We need to handle f128 SELECT_CC with integer result type.
1320 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1321 }
1322
1323 if (Subtarget.hasP9Altivec()) {
1324 if (Subtarget.isISA3_1()) {
1329 } else {
1332 }
1340
1341 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1342 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1344 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1345 }
1346
1347 if (Subtarget.hasP10Vector()) {
1349 }
1350 }
1351
1352 if (Subtarget.pairedVectorMemops()) {
1353 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1354 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1355 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356 }
1357 if (Subtarget.hasMMA()) {
1358 if (Subtarget.isISAFuture())
1359 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360 else
1361 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1362 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1365 }
1366
1367 if (Subtarget.has64BitSupport())
1369
1370 if (Subtarget.isISA3_1())
1371 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1372
1373 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1374
1375 if (!isPPC64) {
1378 }
1379
1384 }
1385
1387
1388 if (Subtarget.hasAltivec()) {
1389 // Altivec instructions set fields to all zeros or all ones.
1391 }
1392
1395 else if (isPPC64)
1397 else
1399
1400 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1401
1402 // We have target-specific dag combine patterns for the following nodes:
1405 if (Subtarget.hasFPCVT())
1408 if (Subtarget.useCRBits())
1412
1414
1416
1417 if (Subtarget.useCRBits()) {
1419 }
1420
1421 setLibcallName(RTLIB::LOG_F128, "logf128");
1422 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1423 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1424 setLibcallName(RTLIB::EXP_F128, "expf128");
1425 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1426 setLibcallName(RTLIB::SIN_F128, "sinf128");
1427 setLibcallName(RTLIB::COS_F128, "cosf128");
1428 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1429 setLibcallName(RTLIB::POW_F128, "powf128");
1430 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1431 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1432 setLibcallName(RTLIB::REM_F128, "fmodf128");
1433 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1434 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1435 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1436 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1437 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1438 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1439 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1440 setLibcallName(RTLIB::RINT_F128, "rintf128");
1441 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1442 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1443 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1444 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1445 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1446
1447 if (Subtarget.isAIXABI()) {
1448 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1449 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1450 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1451 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1452 }
1453
1454 // With 32 condition bits, we don't need to sink (and duplicate) compares
1455 // aggressively in CodeGenPrep.
1456 if (Subtarget.useCRBits()) {
1459 }
1460
1461 // TODO: The default entry number is set to 64. This stops most jump table
1462 // generation on PPC. But it is good for current PPC HWs because the indirect
1463 // branch instruction mtctr to the jump table may lead to bad branch predict.
1464 // Re-evaluate this value on future HWs that can do better with mtctr.
1466
1468
1469 switch (Subtarget.getCPUDirective()) {
1470 default: break;
1471 case PPC::DIR_970:
1472 case PPC::DIR_A2:
1473 case PPC::DIR_E500:
1474 case PPC::DIR_E500mc:
1475 case PPC::DIR_E5500:
1476 case PPC::DIR_PWR4:
1477 case PPC::DIR_PWR5:
1478 case PPC::DIR_PWR5X:
1479 case PPC::DIR_PWR6:
1480 case PPC::DIR_PWR6X:
1481 case PPC::DIR_PWR7:
1482 case PPC::DIR_PWR8:
1483 case PPC::DIR_PWR9:
1484 case PPC::DIR_PWR10:
1485 case PPC::DIR_PWR11:
1489 break;
1490 }
1491
1492 if (Subtarget.enableMachineScheduler())
1494 else
1496
1498
1499 // The Freescale cores do better with aggressive inlining of memcpy and
1500 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1501 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1502 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1503 MaxStoresPerMemset = 32;
1505 MaxStoresPerMemcpy = 32;
1509 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1510 // The A2 also benefits from (very) aggressive inlining of memcpy and
1511 // friends. The overhead of a the function call, even when warm, can be
1512 // over one hundred cycles.
1513 MaxStoresPerMemset = 128;
1514 MaxStoresPerMemcpy = 128;
1515 MaxStoresPerMemmove = 128;
1516 MaxLoadsPerMemcmp = 128;
1517 } else {
1520 }
1521
1522 IsStrictFPEnabled = true;
1523
1524 // Let the subtarget (CPU) decide if a predictable select is more expensive
1525 // than the corresponding branch. This information is used in CGP to decide
1526 // when to convert selects into branches.
1528
1530}
1531
1532// *********************************** NOTE ************************************
1533// For selecting load and store instructions, the addressing modes are defined
1534// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1535// patterns to match the load the store instructions.
1536//
1537// The TD definitions for the addressing modes correspond to their respective
1538// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1539// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1540// address mode flags of a particular node. Afterwards, the computed address
1541// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1542// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1543// accordingly, based on the preferred addressing mode.
1544//
1545// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1546// MemOpFlags contains all the possible flags that can be used to compute the
1547// optimal addressing mode for load and store instructions.
1548// AddrMode contains all the possible load and store addressing modes available
1549// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1550//
1551// When adding new load and store instructions, it is possible that new address
1552// flags may need to be added into MemOpFlags, and a new addressing mode will
1553// need to be added to AddrMode. An entry of the new addressing mode (consisting
1554// of the minimal and main distinguishing address flags for the new load/store
1555// instructions) will need to be added into initializeAddrModeMap() below.
1556// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1557// need to be updated to account for selecting the optimal addressing mode.
1558// *****************************************************************************
1559/// Initialize the map that relates the different addressing modes of the load
1560/// and store instructions to a set of flags. This ensures the load/store
1561/// instruction is correctly matched during instruction selection.
1562void PPCTargetLowering::initializeAddrModeMap() {
1563 AddrModesMap[PPC::AM_DForm] = {
1564 // LWZ, STW
1569 // LBZ, LHZ, STB, STH
1574 // LHA
1579 // LFS, LFD, STFS, STFD
1584 };
1585 AddrModesMap[PPC::AM_DSForm] = {
1586 // LWA
1590 // LD, STD
1594 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1598 };
1599 AddrModesMap[PPC::AM_DQForm] = {
1600 // LXV, STXV
1604 };
1605 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1607 // TODO: Add mapping for quadword load/store.
1608}
1609
1610/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1611/// the desired ByVal argument alignment.
1612static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1613 if (MaxAlign == MaxMaxAlign)
1614 return;
1615 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1616 if (MaxMaxAlign >= 32 &&
1617 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1618 MaxAlign = Align(32);
1619 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1620 MaxAlign < 16)
1621 MaxAlign = Align(16);
1622 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1623 Align EltAlign;
1624 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1625 if (EltAlign > MaxAlign)
1626 MaxAlign = EltAlign;
1627 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1628 for (auto *EltTy : STy->elements()) {
1629 Align EltAlign;
1630 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1631 if (EltAlign > MaxAlign)
1632 MaxAlign = EltAlign;
1633 if (MaxAlign == MaxMaxAlign)
1634 break;
1635 }
1636 }
1637}
1638
1639/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1640/// function arguments in the caller parameter area.
1642 const DataLayout &DL) const {
1643 // 16byte and wider vectors are passed on 16byte boundary.
1644 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1645 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1646 if (Subtarget.hasAltivec())
1647 getMaxByValAlign(Ty, Alignment, Align(16));
1648 return Alignment;
1649}
1650
1652 return Subtarget.useSoftFloat();
1653}
1654
1656 return Subtarget.hasSPE();
1657}
1658
1660 return VT.isScalarInteger();
1661}
1662
1664 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1665 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1666 return false;
1667
1668 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1669 if (VTy->getScalarType()->isIntegerTy()) {
1670 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1671 if (ElemSizeInBits == 32) {
1672 Index = Subtarget.isLittleEndian() ? 2 : 1;
1673 return true;
1674 }
1675 if (ElemSizeInBits == 64) {
1676 Index = Subtarget.isLittleEndian() ? 1 : 0;
1677 return true;
1678 }
1679 }
1680 }
1681 return false;
1682}
1683
1684const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1685 switch ((PPCISD::NodeType)Opcode) {
1686 case PPCISD::FIRST_NUMBER: break;
1687 case PPCISD::FSEL: return "PPCISD::FSEL";
1688 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1689 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1690 case PPCISD::FCFID: return "PPCISD::FCFID";
1691 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1692 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1693 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1694 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1695 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1696 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1697 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1698 case PPCISD::FRE: return "PPCISD::FRE";
1699 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1700 case PPCISD::FTSQRT:
1701 return "PPCISD::FTSQRT";
1702 case PPCISD::FSQRT:
1703 return "PPCISD::FSQRT";
1704 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1705 case PPCISD::VPERM: return "PPCISD::VPERM";
1706 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1708 return "PPCISD::XXSPLTI_SP_TO_DP";
1710 return "PPCISD::XXSPLTI32DX";
1711 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1712 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1713 case PPCISD::XXPERM:
1714 return "PPCISD::XXPERM";
1715 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1716 case PPCISD::CMPB: return "PPCISD::CMPB";
1717 case PPCISD::Hi: return "PPCISD::Hi";
1718 case PPCISD::Lo: return "PPCISD::Lo";
1719 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1720 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1721 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1722 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1723 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1724 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1725 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1726 case PPCISD::SRL: return "PPCISD::SRL";
1727 case PPCISD::SRA: return "PPCISD::SRA";
1728 case PPCISD::SHL: return "PPCISD::SHL";
1729 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1730 case PPCISD::CALL: return "PPCISD::CALL";
1731 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1732 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1733 case PPCISD::CALL_RM:
1734 return "PPCISD::CALL_RM";
1736 return "PPCISD::CALL_NOP_RM";
1738 return "PPCISD::CALL_NOTOC_RM";
1739 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1740 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1741 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1742 case PPCISD::BCTRL_RM:
1743 return "PPCISD::BCTRL_RM";
1745 return "PPCISD::BCTRL_LOAD_TOC_RM";
1746 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1747 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1748 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1749 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1750 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1751 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1752 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1753 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1754 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1755 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1757 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1759 return "PPCISD::ANDI_rec_1_EQ_BIT";
1761 return "PPCISD::ANDI_rec_1_GT_BIT";
1762 case PPCISD::VCMP: return "PPCISD::VCMP";
1763 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1764 case PPCISD::LBRX: return "PPCISD::LBRX";
1765 case PPCISD::STBRX: return "PPCISD::STBRX";
1766 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1767 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1768 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1769 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1770 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1771 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1772 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1773 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1774 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1776 return "PPCISD::ST_VSR_SCAL_INT";
1777 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1778 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1779 case PPCISD::BDZ: return "PPCISD::BDZ";
1780 case PPCISD::MFFS: return "PPCISD::MFFS";
1781 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1782 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1783 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1784 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1785 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1786 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1787 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1788 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1789 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1790 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1791 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1792 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1793 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1794 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1795 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1796 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1797 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1798 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1799 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1800 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1801 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1802 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1803 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1805 return "PPCISD::PADDI_DTPREL";
1806 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1807 case PPCISD::SC: return "PPCISD::SC";
1808 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1809 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1810 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1811 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1812 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1813 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1814 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1815 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1816 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1817 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1818 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1819 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1821 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1823 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1824 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1825 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1826 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1827 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1828 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1829 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1830 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1831 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1833 return "PPCISD::STRICT_FADDRTZ";
1835 return "PPCISD::STRICT_FCTIDZ";
1837 return "PPCISD::STRICT_FCTIWZ";
1839 return "PPCISD::STRICT_FCTIDUZ";
1841 return "PPCISD::STRICT_FCTIWUZ";
1843 return "PPCISD::STRICT_FCFID";
1845 return "PPCISD::STRICT_FCFIDU";
1847 return "PPCISD::STRICT_FCFIDS";
1849 return "PPCISD::STRICT_FCFIDUS";
1850 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1851 case PPCISD::STORE_COND:
1852 return "PPCISD::STORE_COND";
1853 case PPCISD::SETBC:
1854 return "PPCISD::SETBC";
1855 case PPCISD::SETBCR:
1856 return "PPCISD::SETBCR";
1857 }
1858 return nullptr;
1859}
1860
1862 EVT VT) const {
1863 if (!VT.isVector())
1864 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1865
1867}
1868
1870 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1871 return true;
1872}
1873
1874//===----------------------------------------------------------------------===//
1875// Node matching predicates, for use by the tblgen matching code.
1876//===----------------------------------------------------------------------===//
1877
1878/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1880 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1881 return CFP->getValueAPF().isZero();
1882 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1883 // Maybe this has already been legalized into the constant pool?
1884 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1885 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1886 return CFP->getValueAPF().isZero();
1887 }
1888 return false;
1889}
1890
1891/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1892/// true if Op is undef or if it matches the specified value.
1893static bool isConstantOrUndef(int Op, int Val) {
1894 return Op < 0 || Op == Val;
1895}
1896
1897/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1898/// VPKUHUM instruction.
1899/// The ShuffleKind distinguishes between big-endian operations with
1900/// two different inputs (0), either-endian operations with two identical
1901/// inputs (1), and little-endian operations with two different inputs (2).
1902/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1904 SelectionDAG &DAG) {
1905 bool IsLE = DAG.getDataLayout().isLittleEndian();
1906 if (ShuffleKind == 0) {
1907 if (IsLE)
1908 return false;
1909 for (unsigned i = 0; i != 16; ++i)
1910 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1911 return false;
1912 } else if (ShuffleKind == 2) {
1913 if (!IsLE)
1914 return false;
1915 for (unsigned i = 0; i != 16; ++i)
1916 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1917 return false;
1918 } else if (ShuffleKind == 1) {
1919 unsigned j = IsLE ? 0 : 1;
1920 for (unsigned i = 0; i != 8; ++i)
1921 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1922 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1923 return false;
1924 }
1925 return true;
1926}
1927
1928/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1929/// VPKUWUM instruction.
1930/// The ShuffleKind distinguishes between big-endian operations with
1931/// two different inputs (0), either-endian operations with two identical
1932/// inputs (1), and little-endian operations with two different inputs (2).
1933/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1935 SelectionDAG &DAG) {
1936 bool IsLE = DAG.getDataLayout().isLittleEndian();
1937 if (ShuffleKind == 0) {
1938 if (IsLE)
1939 return false;
1940 for (unsigned i = 0; i != 16; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1943 return false;
1944 } else if (ShuffleKind == 2) {
1945 if (!IsLE)
1946 return false;
1947 for (unsigned i = 0; i != 16; i += 2)
1948 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1949 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1950 return false;
1951 } else if (ShuffleKind == 1) {
1952 unsigned j = IsLE ? 0 : 2;
1953 for (unsigned i = 0; i != 8; i += 2)
1954 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1955 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1956 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1957 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1958 return false;
1959 }
1960 return true;
1961}
1962
1963/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1964/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1965/// current subtarget.
1966///
1967/// The ShuffleKind distinguishes between big-endian operations with
1968/// two different inputs (0), either-endian operations with two identical
1969/// inputs (1), and little-endian operations with two different inputs (2).
1970/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1972 SelectionDAG &DAG) {
1973 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1974 if (!Subtarget.hasP8Vector())
1975 return false;
1976
1977 bool IsLE = DAG.getDataLayout().isLittleEndian();
1978 if (ShuffleKind == 0) {
1979 if (IsLE)
1980 return false;
1981 for (unsigned i = 0; i != 16; i += 4)
1982 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1983 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1984 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1985 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1986 return false;
1987 } else if (ShuffleKind == 2) {
1988 if (!IsLE)
1989 return false;
1990 for (unsigned i = 0; i != 16; i += 4)
1991 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1992 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1993 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1994 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1995 return false;
1996 } else if (ShuffleKind == 1) {
1997 unsigned j = IsLE ? 0 : 4;
1998 for (unsigned i = 0; i != 8; i += 4)
1999 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
2000 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
2001 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
2002 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
2003 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
2004 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
2005 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2006 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2007 return false;
2008 }
2009 return true;
2010}
2011
2012/// isVMerge - Common function, used to match vmrg* shuffles.
2013///
2014static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2015 unsigned LHSStart, unsigned RHSStart) {
2016 if (N->getValueType(0) != MVT::v16i8)
2017 return false;
2018 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2019 "Unsupported merge size!");
2020
2021 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2022 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2023 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2024 LHSStart+j+i*UnitSize) ||
2025 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2026 RHSStart+j+i*UnitSize))
2027 return false;
2028 }
2029 return true;
2030}
2031
2032/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2033/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2034/// The ShuffleKind distinguishes between big-endian merges with two
2035/// different inputs (0), either-endian merges with two identical inputs (1),
2036/// and little-endian merges with two different inputs (2). For the latter,
2037/// the input operands are swapped (see PPCInstrAltivec.td).
2039 unsigned ShuffleKind, SelectionDAG &DAG) {
2040 if (DAG.getDataLayout().isLittleEndian()) {
2041 if (ShuffleKind == 1) // unary
2042 return isVMerge(N, UnitSize, 0, 0);
2043 else if (ShuffleKind == 2) // swapped
2044 return isVMerge(N, UnitSize, 0, 16);
2045 else
2046 return false;
2047 } else {
2048 if (ShuffleKind == 1) // unary
2049 return isVMerge(N, UnitSize, 8, 8);
2050 else if (ShuffleKind == 0) // normal
2051 return isVMerge(N, UnitSize, 8, 24);
2052 else
2053 return false;
2054 }
2055}
2056
2057/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2058/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2059/// The ShuffleKind distinguishes between big-endian merges with two
2060/// different inputs (0), either-endian merges with two identical inputs (1),
2061/// and little-endian merges with two different inputs (2). For the latter,
2062/// the input operands are swapped (see PPCInstrAltivec.td).
2064 unsigned ShuffleKind, SelectionDAG &DAG) {
2065 if (DAG.getDataLayout().isLittleEndian()) {
2066 if (ShuffleKind == 1) // unary
2067 return isVMerge(N, UnitSize, 8, 8);
2068 else if (ShuffleKind == 2) // swapped
2069 return isVMerge(N, UnitSize, 8, 24);
2070 else
2071 return false;
2072 } else {
2073 if (ShuffleKind == 1) // unary
2074 return isVMerge(N, UnitSize, 0, 0);
2075 else if (ShuffleKind == 0) // normal
2076 return isVMerge(N, UnitSize, 0, 16);
2077 else
2078 return false;
2079 }
2080}
2081
2082/**
2083 * Common function used to match vmrgew and vmrgow shuffles
2084 *
2085 * The indexOffset determines whether to look for even or odd words in
2086 * the shuffle mask. This is based on the of the endianness of the target
2087 * machine.
2088 * - Little Endian:
2089 * - Use offset of 0 to check for odd elements
2090 * - Use offset of 4 to check for even elements
2091 * - Big Endian:
2092 * - Use offset of 0 to check for even elements
2093 * - Use offset of 4 to check for odd elements
2094 * A detailed description of the vector element ordering for little endian and
2095 * big endian can be found at
2096 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2097 * Targeting your applications - what little endian and big endian IBM XL C/C++
2098 * compiler differences mean to you
2099 *
2100 * The mask to the shuffle vector instruction specifies the indices of the
2101 * elements from the two input vectors to place in the result. The elements are
2102 * numbered in array-access order, starting with the first vector. These vectors
2103 * are always of type v16i8, thus each vector will contain 16 elements of size
2104 * 8. More info on the shuffle vector can be found in the
2105 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2106 * Language Reference.
2107 *
2108 * The RHSStartValue indicates whether the same input vectors are used (unary)
2109 * or two different input vectors are used, based on the following:
2110 * - If the instruction uses the same vector for both inputs, the range of the
2111 * indices will be 0 to 15. In this case, the RHSStart value passed should
2112 * be 0.
2113 * - If the instruction has two different vectors then the range of the
2114 * indices will be 0 to 31. In this case, the RHSStart value passed should
2115 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2116 * to 31 specify elements in the second vector).
2117 *
2118 * \param[in] N The shuffle vector SD Node to analyze
2119 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2120 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2121 * vector to the shuffle_vector instruction
2122 * \return true iff this shuffle vector represents an even or odd word merge
2123 */
2124static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2125 unsigned RHSStartValue) {
2126 if (N->getValueType(0) != MVT::v16i8)
2127 return false;
2128
2129 for (unsigned i = 0; i < 2; ++i)
2130 for (unsigned j = 0; j < 4; ++j)
2131 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2132 i*RHSStartValue+j+IndexOffset) ||
2133 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2134 i*RHSStartValue+j+IndexOffset+8))
2135 return false;
2136 return true;
2137}
2138
2139/**
2140 * Determine if the specified shuffle mask is suitable for the vmrgew or
2141 * vmrgow instructions.
2142 *
2143 * \param[in] N The shuffle vector SD Node to analyze
2144 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2145 * \param[in] ShuffleKind Identify the type of merge:
2146 * - 0 = big-endian merge with two different inputs;
2147 * - 1 = either-endian merge with two identical inputs;
2148 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2149 * little-endian merges).
2150 * \param[in] DAG The current SelectionDAG
2151 * \return true iff this shuffle mask
2152 */
2154 unsigned ShuffleKind, SelectionDAG &DAG) {
2155 if (DAG.getDataLayout().isLittleEndian()) {
2156 unsigned indexOffset = CheckEven ? 4 : 0;
2157 if (ShuffleKind == 1) // Unary
2158 return isVMerge(N, indexOffset, 0);
2159 else if (ShuffleKind == 2) // swapped
2160 return isVMerge(N, indexOffset, 16);
2161 else
2162 return false;
2163 }
2164 else {
2165 unsigned indexOffset = CheckEven ? 0 : 4;
2166 if (ShuffleKind == 1) // Unary
2167 return isVMerge(N, indexOffset, 0);
2168 else if (ShuffleKind == 0) // Normal
2169 return isVMerge(N, indexOffset, 16);
2170 else
2171 return false;
2172 }
2173 return false;
2174}
2175
2176/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2177/// amount, otherwise return -1.
2178/// The ShuffleKind distinguishes between big-endian operations with two
2179/// different inputs (0), either-endian operations with two identical inputs
2180/// (1), and little-endian operations with two different inputs (2). For the
2181/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2182int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2183 SelectionDAG &DAG) {
2184 if (N->getValueType(0) != MVT::v16i8)
2185 return -1;
2186
2187 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2188
2189 // Find the first non-undef value in the shuffle mask.
2190 unsigned i;
2191 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2192 /*search*/;
2193
2194 if (i == 16) return -1; // all undef.
2195
2196 // Otherwise, check to see if the rest of the elements are consecutively
2197 // numbered from this value.
2198 unsigned ShiftAmt = SVOp->getMaskElt(i);
2199 if (ShiftAmt < i) return -1;
2200
2201 ShiftAmt -= i;
2202 bool isLE = DAG.getDataLayout().isLittleEndian();
2203
2204 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2205 // Check the rest of the elements to see if they are consecutive.
2206 for (++i; i != 16; ++i)
2207 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2208 return -1;
2209 } else if (ShuffleKind == 1) {
2210 // Check the rest of the elements to see if they are consecutive.
2211 for (++i; i != 16; ++i)
2212 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2213 return -1;
2214 } else
2215 return -1;
2216
2217 if (isLE)
2218 ShiftAmt = 16 - ShiftAmt;
2219
2220 return ShiftAmt;
2221}
2222
2223/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2224/// specifies a splat of a single element that is suitable for input to
2225/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2227 EVT VT = N->getValueType(0);
2228 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2229 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2230
2231 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2232 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2233
2234 // The consecutive indices need to specify an element, not part of two
2235 // different elements. So abandon ship early if this isn't the case.
2236 if (N->getMaskElt(0) % EltSize != 0)
2237 return false;
2238
2239 // This is a splat operation if each element of the permute is the same, and
2240 // if the value doesn't reference the second vector.
2241 unsigned ElementBase = N->getMaskElt(0);
2242
2243 // FIXME: Handle UNDEF elements too!
2244 if (ElementBase >= 16)
2245 return false;
2246
2247 // Check that the indices are consecutive, in the case of a multi-byte element
2248 // splatted with a v16i8 mask.
2249 for (unsigned i = 1; i != EltSize; ++i)
2250 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2251 return false;
2252
2253 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2254 if (N->getMaskElt(i) < 0) continue;
2255 for (unsigned j = 0; j != EltSize; ++j)
2256 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2257 return false;
2258 }
2259 return true;
2260}
2261
2262/// Check that the mask is shuffling N byte elements. Within each N byte
2263/// element of the mask, the indices could be either in increasing or
2264/// decreasing order as long as they are consecutive.
2265/// \param[in] N the shuffle vector SD Node to analyze
2266/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2267/// Word/DoubleWord/QuadWord).
2268/// \param[in] StepLen the delta indices number among the N byte element, if
2269/// the mask is in increasing/decreasing order then it is 1/-1.
2270/// \return true iff the mask is shuffling N byte elements.
2271static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2272 int StepLen) {
2273 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2274 "Unexpected element width.");
2275 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2276
2277 unsigned NumOfElem = 16 / Width;
2278 unsigned MaskVal[16]; // Width is never greater than 16
2279 for (unsigned i = 0; i < NumOfElem; ++i) {
2280 MaskVal[0] = N->getMaskElt(i * Width);
2281 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2282 return false;
2283 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2284 return false;
2285 }
2286
2287 for (unsigned int j = 1; j < Width; ++j) {
2288 MaskVal[j] = N->getMaskElt(i * Width + j);
2289 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2290 return false;
2291 }
2292 }
2293 }
2294
2295 return true;
2296}
2297
2298bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2299 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2300 if (!isNByteElemShuffleMask(N, 4, 1))
2301 return false;
2302
2303 // Now we look at mask elements 0,4,8,12
2304 unsigned M0 = N->getMaskElt(0) / 4;
2305 unsigned M1 = N->getMaskElt(4) / 4;
2306 unsigned M2 = N->getMaskElt(8) / 4;
2307 unsigned M3 = N->getMaskElt(12) / 4;
2308 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2309 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2310
2311 // Below, let H and L be arbitrary elements of the shuffle mask
2312 // where H is in the range [4,7] and L is in the range [0,3].
2313 // H, 1, 2, 3 or L, 5, 6, 7
2314 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2315 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2317 InsertAtByte = IsLE ? 12 : 0;
2318 Swap = M0 < 4;
2319 return true;
2320 }
2321 // 0, H, 2, 3 or 4, L, 6, 7
2322 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2323 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2325 InsertAtByte = IsLE ? 8 : 4;
2326 Swap = M1 < 4;
2327 return true;
2328 }
2329 // 0, 1, H, 3 or 4, 5, L, 7
2330 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2331 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2333 InsertAtByte = IsLE ? 4 : 8;
2334 Swap = M2 < 4;
2335 return true;
2336 }
2337 // 0, 1, 2, H or 4, 5, 6, L
2338 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2339 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2340 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2341 InsertAtByte = IsLE ? 0 : 12;
2342 Swap = M3 < 4;
2343 return true;
2344 }
2345
2346 // If both vector operands for the shuffle are the same vector, the mask will
2347 // contain only elements from the first one and the second one will be undef.
2348 if (N->getOperand(1).isUndef()) {
2349 ShiftElts = 0;
2350 Swap = true;
2351 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2352 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2353 InsertAtByte = IsLE ? 12 : 0;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2357 InsertAtByte = IsLE ? 8 : 4;
2358 return true;
2359 }
2360 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2361 InsertAtByte = IsLE ? 4 : 8;
2362 return true;
2363 }
2364 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2365 InsertAtByte = IsLE ? 0 : 12;
2366 return true;
2367 }
2368 }
2369
2370 return false;
2371}
2372
2374 bool &Swap, bool IsLE) {
2375 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2376 // Ensure each byte index of the word is consecutive.
2377 if (!isNByteElemShuffleMask(N, 4, 1))
2378 return false;
2379
2380 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2381 unsigned M0 = N->getMaskElt(0) / 4;
2382 unsigned M1 = N->getMaskElt(4) / 4;
2383 unsigned M2 = N->getMaskElt(8) / 4;
2384 unsigned M3 = N->getMaskElt(12) / 4;
2385
2386 // If both vector operands for the shuffle are the same vector, the mask will
2387 // contain only elements from the first one and the second one will be undef.
2388 if (N->getOperand(1).isUndef()) {
2389 assert(M0 < 4 && "Indexing into an undef vector?");
2390 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2391 return false;
2392
2393 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2394 Swap = false;
2395 return true;
2396 }
2397
2398 // Ensure each word index of the ShuffleVector Mask is consecutive.
2399 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2400 return false;
2401
2402 if (IsLE) {
2403 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2404 // Input vectors don't need to be swapped if the leading element
2405 // of the result is one of the 3 left elements of the second vector
2406 // (or if there is no shift to be done at all).
2407 Swap = false;
2408 ShiftElts = (8 - M0) % 8;
2409 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2410 // Input vectors need to be swapped if the leading element
2411 // of the result is one of the 3 left elements of the first vector
2412 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2413 Swap = true;
2414 ShiftElts = (4 - M0) % 4;
2415 }
2416
2417 return true;
2418 } else { // BE
2419 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2420 // Input vectors don't need to be swapped if the leading element
2421 // of the result is one of the 4 elements of the first vector.
2422 Swap = false;
2423 ShiftElts = M0;
2424 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2425 // Input vectors need to be swapped if the leading element
2426 // of the result is one of the 4 elements of the right vector.
2427 Swap = true;
2428 ShiftElts = M0 - 4;
2429 }
2430
2431 return true;
2432 }
2433}
2434
2436 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2437
2438 if (!isNByteElemShuffleMask(N, Width, -1))
2439 return false;
2440
2441 for (int i = 0; i < 16; i += Width)
2442 if (N->getMaskElt(i) != i + Width - 1)
2443 return false;
2444
2445 return true;
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 2);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 4);
2454}
2455
2457 return isXXBRShuffleMaskHelper(N, 8);
2458}
2459
2461 return isXXBRShuffleMaskHelper(N, 16);
2462}
2463
2464/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2465/// if the inputs to the instruction should be swapped and set \p DM to the
2466/// value for the immediate.
2467/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2468/// AND element 0 of the result comes from the first input (LE) or second input
2469/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2470/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2471/// mask.
2473 bool &Swap, bool IsLE) {
2474 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2475
2476 // Ensure each byte index of the double word is consecutive.
2477 if (!isNByteElemShuffleMask(N, 8, 1))
2478 return false;
2479
2480 unsigned M0 = N->getMaskElt(0) / 8;
2481 unsigned M1 = N->getMaskElt(8) / 8;
2482 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2483
2484 // If both vector operands for the shuffle are the same vector, the mask will
2485 // contain only elements from the first one and the second one will be undef.
2486 if (N->getOperand(1).isUndef()) {
2487 if ((M0 | M1) < 2) {
2488 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2489 Swap = false;
2490 return true;
2491 } else
2492 return false;
2493 }
2494
2495 if (IsLE) {
2496 if (M0 > 1 && M1 < 2) {
2497 Swap = false;
2498 } else if (M0 < 2 && M1 > 1) {
2499 M0 = (M0 + 2) % 4;
2500 M1 = (M1 + 2) % 4;
2501 Swap = true;
2502 } else
2503 return false;
2504
2505 // Note: if control flow comes here that means Swap is already set above
2506 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2507 return true;
2508 } else { // BE
2509 if (M0 < 2 && M1 > 1) {
2510 Swap = false;
2511 } else if (M0 > 1 && M1 < 2) {
2512 M0 = (M0 + 2) % 4;
2513 M1 = (M1 + 2) % 4;
2514 Swap = true;
2515 } else
2516 return false;
2517
2518 // Note: if control flow comes here that means Swap is already set above
2519 DM = (M0 << 1) + (M1 & 1);
2520 return true;
2521 }
2522}
2523
2524
2525/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2526/// appropriate for PPC mnemonics (which have a big endian bias - namely
2527/// elements are counted from the left of the vector register).
2528unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2529 SelectionDAG &DAG) {
2530 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2531 assert(isSplatShuffleMask(SVOp, EltSize));
2532 EVT VT = SVOp->getValueType(0);
2533
2534 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2535 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2536 : SVOp->getMaskElt(0);
2537
2538 if (DAG.getDataLayout().isLittleEndian())
2539 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2540 else
2541 return SVOp->getMaskElt(0) / EltSize;
2542}
2543
2544/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2545/// by using a vspltis[bhw] instruction of the specified element size, return
2546/// the constant being splatted. The ByteSize field indicates the number of
2547/// bytes of each element [124] -> [bhw].
2549 SDValue OpVal;
2550
2551 // If ByteSize of the splat is bigger than the element size of the
2552 // build_vector, then we have a case where we are checking for a splat where
2553 // multiple elements of the buildvector are folded together into a single
2554 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2555 unsigned EltSize = 16/N->getNumOperands();
2556 if (EltSize < ByteSize) {
2557 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2558 SDValue UniquedVals[4];
2559 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2560
2561 // See if all of the elements in the buildvector agree across.
2562 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2563 if (N->getOperand(i).isUndef()) continue;
2564 // If the element isn't a constant, bail fully out.
2565 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2566
2567 if (!UniquedVals[i&(Multiple-1)].getNode())
2568 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2569 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2570 return SDValue(); // no match.
2571 }
2572
2573 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2574 // either constant or undef values that are identical for each chunk. See
2575 // if these chunks can form into a larger vspltis*.
2576
2577 // Check to see if all of the leading entries are either 0 or -1. If
2578 // neither, then this won't fit into the immediate field.
2579 bool LeadingZero = true;
2580 bool LeadingOnes = true;
2581 for (unsigned i = 0; i != Multiple-1; ++i) {
2582 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2583
2584 LeadingZero &= isNullConstant(UniquedVals[i]);
2585 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2586 }
2587 // Finally, check the least significant entry.
2588 if (LeadingZero) {
2589 if (!UniquedVals[Multiple-1].getNode())
2590 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2591 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2592 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2593 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2594 }
2595 if (LeadingOnes) {
2596 if (!UniquedVals[Multiple-1].getNode())
2597 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2598 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2599 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2600 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2601 }
2602
2603 return SDValue();
2604 }
2605
2606 // Check to see if this buildvec has a single non-undef value in its elements.
2607 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2608 if (N->getOperand(i).isUndef()) continue;
2609 if (!OpVal.getNode())
2610 OpVal = N->getOperand(i);
2611 else if (OpVal != N->getOperand(i))
2612 return SDValue();
2613 }
2614
2615 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2616
2617 unsigned ValSizeInBytes = EltSize;
2618 uint64_t Value = 0;
2619 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2620 Value = CN->getZExtValue();
2621 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2622 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2623 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2624 }
2625
2626 // If the splat value is larger than the element value, then we can never do
2627 // this splat. The only case that we could fit the replicated bits into our
2628 // immediate field for would be zero, and we prefer to use vxor for it.
2629 if (ValSizeInBytes < ByteSize) return SDValue();
2630
2631 // If the element value is larger than the splat value, check if it consists
2632 // of a repeated bit pattern of size ByteSize.
2633 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2634 return SDValue();
2635
2636 // Properly sign extend the value.
2637 int MaskVal = SignExtend32(Value, ByteSize * 8);
2638
2639 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2640 if (MaskVal == 0) return SDValue();
2641
2642 // Finally, if this value fits in a 5 bit sext field, return it
2643 if (SignExtend32<5>(MaskVal) == MaskVal)
2644 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2645 return SDValue();
2646}
2647
2648//===----------------------------------------------------------------------===//
2649// Addressing Mode Selection
2650//===----------------------------------------------------------------------===//
2651
2652/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2653/// or 64-bit immediate, and if the value can be accurately represented as a
2654/// sign extension from a 16-bit value. If so, this returns true and the
2655/// immediate.
2656bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2657 if (!isa<ConstantSDNode>(N))
2658 return false;
2659
2660 Imm = (int16_t)N->getAsZExtVal();
2661 if (N->getValueType(0) == MVT::i32)
2662 return Imm == (int32_t)N->getAsZExtVal();
2663 else
2664 return Imm == (int64_t)N->getAsZExtVal();
2665}
2667 return isIntS16Immediate(Op.getNode(), Imm);
2668}
2669
2670/// Used when computing address flags for selecting loads and stores.
2671/// If we have an OR, check if the LHS and RHS are provably disjoint.
2672/// An OR of two provably disjoint values is equivalent to an ADD.
2673/// Most PPC load/store instructions compute the effective address as a sum,
2674/// so doing this conversion is useful.
2675static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2676 if (N.getOpcode() != ISD::OR)
2677 return false;
2678 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2679 if (!LHSKnown.Zero.getBoolValue())
2680 return false;
2681 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2682 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2683}
2684
2685/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2686/// be represented as an indexed [r+r] operation.
2688 SDValue &Index,
2689 SelectionDAG &DAG) const {
2690 for (SDNode *U : N->users()) {
2691 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2692 if (Memop->getMemoryVT() == MVT::f64) {
2693 Base = N.getOperand(0);
2694 Index = N.getOperand(1);
2695 return true;
2696 }
2697 }
2698 }
2699 return false;
2700}
2701
2702/// isIntS34Immediate - This method tests if value of node given can be
2703/// accurately represented as a sign extension from a 34-bit value. If so,
2704/// this returns true and the immediate.
2705bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2706 if (!isa<ConstantSDNode>(N))
2707 return false;
2708
2709 Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue();
2710 return isInt<34>(Imm);
2711}
2713 return isIntS34Immediate(Op.getNode(), Imm);
2714}
2715
2716/// SelectAddressRegReg - Given the specified addressed, check to see if it
2717/// can be represented as an indexed [r+r] operation. Returns false if it
2718/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2719/// non-zero and N can be represented by a base register plus a signed 16-bit
2720/// displacement, make a more precise judgement by checking (displacement % \p
2721/// EncodingAlignment).
2723 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2724 MaybeAlign EncodingAlignment) const {
2725 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2726 // a [pc+imm].
2728 return false;
2729
2730 int16_t Imm = 0;
2731 if (N.getOpcode() == ISD::ADD) {
2732 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2733 // SPE load/store can only handle 8-bit offsets.
2734 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2735 return true;
2736 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2737 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2738 return false; // r+i
2739 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2740 return false; // r+i
2741
2742 Base = N.getOperand(0);
2743 Index = N.getOperand(1);
2744 return true;
2745 } else if (N.getOpcode() == ISD::OR) {
2746 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2747 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2748 return false; // r+i can fold it if we can.
2749
2750 // If this is an or of disjoint bitfields, we can codegen this as an add
2751 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2752 // disjoint.
2753 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2754
2755 if (LHSKnown.Zero.getBoolValue()) {
2756 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2757 // If all of the bits are known zero on the LHS or RHS, the add won't
2758 // carry.
2759 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2760 Base = N.getOperand(0);
2761 Index = N.getOperand(1);
2762 return true;
2763 }
2764 }
2765 }
2766
2767 return false;
2768}
2769
2770// If we happen to be doing an i64 load or store into a stack slot that has
2771// less than a 4-byte alignment, then the frame-index elimination may need to
2772// use an indexed load or store instruction (because the offset may not be a
2773// multiple of 4). The extra register needed to hold the offset comes from the
2774// register scavenger, and it is possible that the scavenger will need to use
2775// an emergency spill slot. As a result, we need to make sure that a spill slot
2776// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2777// stack slot.
2778static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2779 // FIXME: This does not handle the LWA case.
2780 if (VT != MVT::i64)
2781 return;
2782
2783 // NOTE: We'll exclude negative FIs here, which come from argument
2784 // lowering, because there are no known test cases triggering this problem
2785 // using packed structures (or similar). We can remove this exclusion if
2786 // we find such a test case. The reason why this is so test-case driven is
2787 // because this entire 'fixup' is only to prevent crashes (from the
2788 // register scavenger) on not-really-valid inputs. For example, if we have:
2789 // %a = alloca i1
2790 // %b = bitcast i1* %a to i64*
2791 // store i64* a, i64 b
2792 // then the store should really be marked as 'align 1', but is not. If it
2793 // were marked as 'align 1' then the indexed form would have been
2794 // instruction-selected initially, and the problem this 'fixup' is preventing
2795 // won't happen regardless.
2796 if (FrameIdx < 0)
2797 return;
2798
2800 MachineFrameInfo &MFI = MF.getFrameInfo();
2801
2802 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2803 return;
2804
2805 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2806 FuncInfo->setHasNonRISpills();
2807}
2808
2809/// Returns true if the address N can be represented by a base register plus
2810/// a signed 16-bit displacement [r+imm], and if it is not better
2811/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2812/// displacements that are multiples of that value.
2814 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2815 MaybeAlign EncodingAlignment) const {
2816 // FIXME dl should come from parent load or store, not from address
2817 SDLoc dl(N);
2818
2819 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2820 // a [pc+imm].
2822 return false;
2823
2824 // If this can be more profitably realized as r+r, fail.
2825 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2826 return false;
2827
2828 if (N.getOpcode() == ISD::ADD) {
2829 int16_t imm = 0;
2830 if (isIntS16Immediate(N.getOperand(1), imm) &&
2831 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2832 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2833 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2834 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2835 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2836 } else {
2837 Base = N.getOperand(0);
2838 }
2839 return true; // [r+i]
2840 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2841 // Match LOAD (ADD (X, Lo(G))).
2842 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2843 "Cannot handle constant offsets yet!");
2844 Disp = N.getOperand(1).getOperand(0); // The global address.
2849 Base = N.getOperand(0);
2850 return true; // [&g+r]
2851 }
2852 } else if (N.getOpcode() == ISD::OR) {
2853 int16_t imm = 0;
2854 if (isIntS16Immediate(N.getOperand(1), imm) &&
2855 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2856 // If this is an or of disjoint bitfields, we can codegen this as an add
2857 // (for better address arithmetic) if the LHS and RHS of the OR are
2858 // provably disjoint.
2859 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2860
2861 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2862 // If all of the bits are known zero on the LHS or RHS, the add won't
2863 // carry.
2864 if (FrameIndexSDNode *FI =
2865 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2866 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2867 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2868 } else {
2869 Base = N.getOperand(0);
2870 }
2871 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2872 return true;
2873 }
2874 }
2875 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2876 // Loading from a constant address.
2877
2878 // If this address fits entirely in a 16-bit sext immediate field, codegen
2879 // this as "d, 0"
2880 int16_t Imm;
2881 if (isIntS16Immediate(CN, Imm) &&
2882 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2883 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2884 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2885 CN->getValueType(0));
2886 return true;
2887 }
2888
2889 // Handle 32-bit sext immediates with LIS + addr mode.
2890 if ((CN->getValueType(0) == MVT::i32 ||
2891 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2892 (!EncodingAlignment ||
2893 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2894 int Addr = (int)CN->getZExtValue();
2895
2896 // Otherwise, break this down into an LIS + disp.
2897 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2898
2899 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2900 MVT::i32);
2901 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2902 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2903 return true;
2904 }
2905 }
2906
2907 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2908 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2909 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2910 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2911 } else
2912 Base = N;
2913 return true; // [r+0]
2914}
2915
2916/// Similar to the 16-bit case but for instructions that take a 34-bit
2917/// displacement field (prefixed loads/stores).
2919 SDValue &Base,
2920 SelectionDAG &DAG) const {
2921 // Only on 64-bit targets.
2922 if (N.getValueType() != MVT::i64)
2923 return false;
2924
2925 SDLoc dl(N);
2926 int64_t Imm = 0;
2927
2928 if (N.getOpcode() == ISD::ADD) {
2929 if (!isIntS34Immediate(N.getOperand(1), Imm))
2930 return false;
2931 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2932 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2933 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2934 else
2935 Base = N.getOperand(0);
2936 return true;
2937 }
2938
2939 if (N.getOpcode() == ISD::OR) {
2940 if (!isIntS34Immediate(N.getOperand(1), Imm))
2941 return false;
2942 // If this is an or of disjoint bitfields, we can codegen this as an add
2943 // (for better address arithmetic) if the LHS and RHS of the OR are
2944 // provably disjoint.
2945 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2946 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2947 return false;
2948 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2949 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2950 else
2951 Base = N.getOperand(0);
2952 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2953 return true;
2954 }
2955
2956 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2957 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2958 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2959 return true;
2960 }
2961
2962 return false;
2963}
2964
2965/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2966/// represented as an indexed [r+r] operation.
2968 SDValue &Index,
2969 SelectionDAG &DAG) const {
2970 // Check to see if we can easily represent this as an [r+r] address. This
2971 // will fail if it thinks that the address is more profitably represented as
2972 // reg+imm, e.g. where imm = 0.
2973 if (SelectAddressRegReg(N, Base, Index, DAG))
2974 return true;
2975
2976 // If the address is the result of an add, we will utilize the fact that the
2977 // address calculation includes an implicit add. However, we can reduce
2978 // register pressure if we do not materialize a constant just for use as the
2979 // index register. We only get rid of the add if it is not an add of a
2980 // value and a 16-bit signed constant and both have a single use.
2981 int16_t imm = 0;
2982 if (N.getOpcode() == ISD::ADD &&
2983 (!isIntS16Immediate(N.getOperand(1), imm) ||
2984 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2985 Base = N.getOperand(0);
2986 Index = N.getOperand(1);
2987 return true;
2988 }
2989
2990 // Otherwise, do it the hard way, using R0 as the base register.
2991 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2992 N.getValueType());
2993 Index = N;
2994 return true;
2995}
2996
2997template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2998 Ty *PCRelCand = dyn_cast<Ty>(N);
2999 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
3000}
3001
3002/// Returns true if this address is a PC Relative address.
3003/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3004/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3006 // This is a materialize PC Relative node. Always select this as PC Relative.
3007 Base = N;
3008 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3009 return true;
3010 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3011 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3012 isValidPCRelNode<JumpTableSDNode>(N) ||
3013 isValidPCRelNode<BlockAddressSDNode>(N))
3014 return true;
3015 return false;
3016}
3017
3018/// Returns true if we should use a direct load into vector instruction
3019/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3020static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3021
3022 // If there are any other uses other than scalar to vector, then we should
3023 // keep it as a scalar load -> direct move pattern to prevent multiple
3024 // loads.
3025 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3026 if (!LD)
3027 return false;
3028
3029 EVT MemVT = LD->getMemoryVT();
3030 if (!MemVT.isSimple())
3031 return false;
3032 switch(MemVT.getSimpleVT().SimpleTy) {
3033 case MVT::i64:
3034 break;
3035 case MVT::i32:
3036 if (!ST.hasP8Vector())
3037 return false;
3038 break;
3039 case MVT::i16:
3040 case MVT::i8:
3041 if (!ST.hasP9Vector())
3042 return false;
3043 break;
3044 default:
3045 return false;
3046 }
3047
3048 SDValue LoadedVal(N, 0);
3049 if (!LoadedVal.hasOneUse())
3050 return false;
3051
3052 for (SDUse &Use : LD->uses())
3053 if (Use.getResNo() == 0 &&
3054 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3056 return false;
3057
3058 return true;
3059}
3060
3061/// getPreIndexedAddressParts - returns true by value, base pointer and
3062/// offset pointer and addressing mode by reference if the node's address
3063/// can be legally represented as pre-indexed load / store address.
3065 SDValue &Offset,
3067 SelectionDAG &DAG) const {
3068 if (DisablePPCPreinc) return false;
3069
3070 bool isLoad = true;
3071 SDValue Ptr;
3072 EVT VT;
3073 Align Alignment;
3074 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3075 Ptr = LD->getBasePtr();
3076 VT = LD->getMemoryVT();
3077 Alignment = LD->getAlign();
3078 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3079 Ptr = ST->getBasePtr();
3080 VT = ST->getMemoryVT();
3081 Alignment = ST->getAlign();
3082 isLoad = false;
3083 } else
3084 return false;
3085
3086 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3087 // instructions because we can fold these into a more efficient instruction
3088 // instead, (such as LXSD).
3089 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3090 return false;
3091 }
3092
3093 // PowerPC doesn't have preinc load/store instructions for vectors
3094 if (VT.isVector())
3095 return false;
3096
3097 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3098 // Common code will reject creating a pre-inc form if the base pointer
3099 // is a frame index, or if N is a store and the base pointer is either
3100 // the same as or a predecessor of the value being stored. Check for
3101 // those situations here, and try with swapped Base/Offset instead.
3102 bool Swap = false;
3103
3104 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3105 Swap = true;
3106 else if (!isLoad) {
3107 SDValue Val = cast<StoreSDNode>(N)->getValue();
3108 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3109 Swap = true;
3110 }
3111
3112 if (Swap)
3114
3115 AM = ISD::PRE_INC;
3116 return true;
3117 }
3118
3119 // LDU/STU can only handle immediates that are a multiple of 4.
3120 if (VT != MVT::i64) {
3121 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3122 return false;
3123 } else {
3124 // LDU/STU need an address with at least 4-byte alignment.
3125 if (Alignment < Align(4))
3126 return false;
3127
3128 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3129 return false;
3130 }
3131
3132 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3133 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3134 // sext i32 to i64 when addr mode is r+i.
3135 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3136 LD->getExtensionType() == ISD::SEXTLOAD &&
3137 isa<ConstantSDNode>(Offset))
3138 return false;
3139 }
3140
3141 AM = ISD::PRE_INC;
3142 return true;
3143}
3144
3145//===----------------------------------------------------------------------===//
3146// LowerOperation implementation
3147//===----------------------------------------------------------------------===//
3148
3149/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3150/// and LoOpFlags to the target MO flags.
3151static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3152 unsigned &HiOpFlags, unsigned &LoOpFlags,
3153 const GlobalValue *GV = nullptr) {
3154 HiOpFlags = PPCII::MO_HA;
3155 LoOpFlags = PPCII::MO_LO;
3156
3157 // Don't use the pic base if not in PIC relocation model.
3158 if (IsPIC) {
3159 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3160 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3161 }
3162}
3163
3164static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3165 SelectionDAG &DAG) {
3166 SDLoc DL(HiPart);
3167 EVT PtrVT = HiPart.getValueType();
3168 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3169
3170 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3171 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3172
3173 // With PIC, the first instruction is actually "GR+hi(&G)".
3174 if (isPIC)
3175 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3176 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3177
3178 // Generate non-pic code that has direct accesses to the constant pool.
3179 // The address of the global is just (hi(&g)+lo(&g)).
3180 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3181}
3182
3184 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3185 FuncInfo->setUsesTOCBasePtr();
3186}
3187
3190}
3191
3192SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3193 SDValue GA) const {
3194 EVT VT = Subtarget.getScalarIntVT();
3195 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3196 : Subtarget.isAIXABI()
3197 ? DAG.getRegister(PPC::R2, VT)
3198 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3199 SDValue Ops[] = { GA, Reg };
3200 return DAG.getMemIntrinsicNode(
3201 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3204}
3205
3206SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3207 SelectionDAG &DAG) const {
3208 EVT PtrVT = Op.getValueType();
3209 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3210 const Constant *C = CP->getConstVal();
3211
3212 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3213 // The actual address of the GlobalValue is stored in the TOC.
3214 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3215 if (Subtarget.isUsingPCRelativeCalls()) {
3216 SDLoc DL(CP);
3217 EVT Ty = getPointerTy(DAG.getDataLayout());
3218 SDValue ConstPool = DAG.getTargetConstantPool(
3219 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3220 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3221 }
3222 setUsesTOCBasePtr(DAG);
3223 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3224 return getTOCEntry(DAG, SDLoc(CP), GA);
3225 }
3226
3227 unsigned MOHiFlag, MOLoFlag;
3228 bool IsPIC = isPositionIndependent();
3229 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3230
3231 if (IsPIC && Subtarget.isSVR4ABI()) {
3232 SDValue GA =
3233 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3234 return getTOCEntry(DAG, SDLoc(CP), GA);
3235 }
3236
3237 SDValue CPIHi =
3238 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3239 SDValue CPILo =
3240 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3241 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3242}
3243
3244// For 64-bit PowerPC, prefer the more compact relative encodings.
3245// This trades 32 bits per jump table entry for one or two instructions
3246// on the jump site.
3248 if (isJumpTableRelative())
3250
3252}
3253
3256 return false;
3257 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3258 return true;
3260}
3261
3263 SelectionDAG &DAG) const {
3264 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3266
3267 switch (getTargetMachine().getCodeModel()) {
3268 case CodeModel::Small:
3269 case CodeModel::Medium:
3271 default:
3272 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3274 }
3275}
3276
3277const MCExpr *
3279 unsigned JTI,
3280 MCContext &Ctx) const {
3281 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3283
3284 switch (getTargetMachine().getCodeModel()) {
3285 case CodeModel::Small:
3286 case CodeModel::Medium:
3288 default:
3289 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3290 }
3291}
3292
3293SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3294 EVT PtrVT = Op.getValueType();
3295 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3296
3297 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3298 if (Subtarget.isUsingPCRelativeCalls()) {
3299 SDLoc DL(JT);
3300 EVT Ty = getPointerTy(DAG.getDataLayout());
3301 SDValue GA =
3302 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3303 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3304 return MatAddr;
3305 }
3306
3307 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3308 // The actual address of the GlobalValue is stored in the TOC.
3309 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3310 setUsesTOCBasePtr(DAG);
3311 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3312 return getTOCEntry(DAG, SDLoc(JT), GA);
3313 }
3314
3315 unsigned MOHiFlag, MOLoFlag;
3316 bool IsPIC = isPositionIndependent();
3317 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3318
3319 if (IsPIC && Subtarget.isSVR4ABI()) {
3320 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3322 return getTOCEntry(DAG, SDLoc(GA), GA);
3323 }
3324
3325 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3326 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3327 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3328}
3329
3330SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3331 SelectionDAG &DAG) const {
3332 EVT PtrVT = Op.getValueType();
3333 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3334 const BlockAddress *BA = BASDN->getBlockAddress();
3335
3336 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3337 if (Subtarget.isUsingPCRelativeCalls()) {
3338 SDLoc DL(BASDN);
3339 EVT Ty = getPointerTy(DAG.getDataLayout());
3340 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3342 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3343 return MatAddr;
3344 }
3345
3346 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3347 // The actual BlockAddress is stored in the TOC.
3348 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3349 setUsesTOCBasePtr(DAG);
3350 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3351 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3352 }
3353
3354 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3355 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3356 return getTOCEntry(
3357 DAG, SDLoc(BASDN),
3358 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3359
3360 unsigned MOHiFlag, MOLoFlag;
3361 bool IsPIC = isPositionIndependent();
3362 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3363 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3364 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3365 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3366}
3367
3368SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3369 SelectionDAG &DAG) const {
3370 if (Subtarget.isAIXABI())
3371 return LowerGlobalTLSAddressAIX(Op, DAG);
3372
3373 return LowerGlobalTLSAddressLinux(Op, DAG);
3374}
3375
3376/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3377/// and then apply the update.
3379 SelectionDAG &DAG,
3380 const TargetMachine &TM) {
3381 // Initialize TLS model opt setting lazily:
3382 // (1) Use initial-exec for single TLS var references within current function.
3383 // (2) Use local-dynamic for multiple TLS var references within current
3384 // function.
3385 PPCFunctionInfo *FuncInfo =
3387 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3389 // Iterate over all instructions within current function, collect all TLS
3390 // global variables (global variables taken as the first parameter to
3391 // Intrinsic::threadlocal_address).
3392 const Function &Func = DAG.getMachineFunction().getFunction();
3393 for (const BasicBlock &BB : Func)
3394 for (const Instruction &I : BB)
3395 if (I.getOpcode() == Instruction::Call)
3396 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3397 if (Function *CF = CI->getCalledFunction())
3398 if (CF->isDeclaration() &&
3399 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3400 if (const GlobalValue *GV =
3401 dyn_cast<GlobalValue>(I.getOperand(0))) {
3402 TLSModel::Model GVModel = TM.getTLSModel(GV);
3403 if (GVModel == TLSModel::LocalDynamic)
3404 TLSGV.insert(GV);
3405 }
3406
3407 unsigned TLSGVCnt = TLSGV.size();
3408 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3409 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3410 FuncInfo->setAIXFuncUseTLSIEForLD();
3412 }
3413
3414 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3415 LLVM_DEBUG(
3416 dbgs() << DAG.getMachineFunction().getName()
3417 << " function is using the TLS-IE model for TLS-LD access.\n");
3418 Model = TLSModel::InitialExec;
3419 }
3420}
3421
3422SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3423 SelectionDAG &DAG) const {
3424 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3425
3426 if (DAG.getTarget().useEmulatedTLS())
3427 report_fatal_error("Emulated TLS is not yet supported on AIX");
3428
3429 SDLoc dl(GA);
3430 const GlobalValue *GV = GA->getGlobal();
3431 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3432 bool Is64Bit = Subtarget.isPPC64();
3434
3435 // Apply update to the TLS model.
3436 if (Subtarget.hasAIXShLibTLSModelOpt())
3438
3439 // TLS variables are accessed through TOC entries.
3440 // To support this, set the DAG to use the TOC base pointer.
3441 setUsesTOCBasePtr(DAG);
3442
3443 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3444
3445 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3446 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3447 bool HasAIXSmallTLSGlobalAttr = false;
3448 SDValue VariableOffsetTGA =
3449 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3450 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3451 SDValue TLSReg;
3452
3453 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3454 if (GVar->hasAttribute("aix-small-tls"))
3455 HasAIXSmallTLSGlobalAttr = true;
3456
3457 if (Is64Bit) {
3458 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3459 // involves a load of the variable offset (from the TOC), followed by an
3460 // add of the loaded variable offset to R13 (the thread pointer).
3461 // This code sequence looks like:
3462 // ld reg1,var[TC](2)
3463 // add reg2, reg1, r13 // r13 contains the thread pointer
3464 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3465
3466 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3467 // global variable attribute, produce a faster access sequence for
3468 // local-exec TLS variables where the offset from the TLS base is encoded
3469 // as an immediate operand.
3470 //
3471 // We only utilize the faster local-exec access sequence when the TLS
3472 // variable has a size within the policy limit. We treat types that are
3473 // not sized or are empty as being over the policy size limit.
3474 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3475 IsTLSLocalExecModel) {
3476 Type *GVType = GV->getValueType();
3477 if (GVType->isSized() && !GVType->isEmptyTy() &&
3478 GV->getDataLayout().getTypeAllocSize(GVType) <=
3480 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3481 }
3482 } else {
3483 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3484 // involves loading the variable offset from the TOC, generating a call to
3485 // .__get_tpointer to get the thread pointer (which will be in R3), and
3486 // adding the two together:
3487 // lwz reg1,var[TC](2)
3488 // bla .__get_tpointer
3489 // add reg2, reg1, r3
3490 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3491
3492 // We do not implement the 32-bit version of the faster access sequence
3493 // for local-exec that is controlled by the -maix-small-local-exec-tls
3494 // option, or the "aix-small-tls" global variable attribute.
3495 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3496 report_fatal_error("The small-local-exec TLS access sequence is "
3497 "currently only supported on AIX (64-bit mode).");
3498 }
3499 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3500 }
3501
3502 if (Model == TLSModel::LocalDynamic) {
3503 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3504
3505 // We do not implement the 32-bit version of the faster access sequence
3506 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3507 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3508 report_fatal_error("The small-local-dynamic TLS access sequence is "
3509 "currently only supported on AIX (64-bit mode).");
3510
3511 // For local-dynamic on AIX, we need to generate one TOC entry for each
3512 // variable offset, and a single module-handle TOC entry for the entire
3513 // file.
3514
3515 SDValue VariableOffsetTGA =
3516 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3517 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3518
3520 GlobalVariable *TLSGV =
3521 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3522 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3524 assert(TLSGV && "Not able to create GV for _$TLSML.");
3525 SDValue ModuleHandleTGA =
3526 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3527 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3528 SDValue ModuleHandle =
3529 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3530
3531 // With the -maix-small-local-dynamic-tls option, produce a faster access
3532 // sequence for local-dynamic TLS variables where the offset from the
3533 // module-handle is encoded as an immediate operand.
3534 //
3535 // We only utilize the faster local-dynamic access sequence when the TLS
3536 // variable has a size within the policy limit. We treat types that are
3537 // not sized or are empty as being over the policy size limit.
3538 if (HasAIXSmallLocalDynamicTLS) {
3539 Type *GVType = GV->getValueType();
3540 if (GVType->isSized() && !GVType->isEmptyTy() &&
3541 GV->getDataLayout().getTypeAllocSize(GVType) <=
3543 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3544 ModuleHandle);
3545 }
3546
3547 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3548 }
3549
3550 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3551 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3552 // need to generate two TOC entries, one for the variable offset, one for the
3553 // region handle. The global address for the TOC entry of the region handle is
3554 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3555 // entry of the variable offset is created with MO_TLSGD_FLAG.
3556 SDValue VariableOffsetTGA =
3557 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3558 SDValue RegionHandleTGA =
3559 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3560 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3561 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3562 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3563 RegionHandle);
3564}
3565
3566SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3567 SelectionDAG &DAG) const {
3568 // FIXME: TLS addresses currently use medium model code sequences,
3569 // which is the most useful form. Eventually support for small and
3570 // large models could be added if users need it, at the cost of
3571 // additional complexity.
3572 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3573 if (DAG.getTarget().useEmulatedTLS())
3574 return LowerToTLSEmulatedModel(GA, DAG);
3575
3576 SDLoc dl(GA);
3577 const GlobalValue *GV = GA->getGlobal();
3578 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3579 bool is64bit = Subtarget.isPPC64();
3580 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3581 PICLevel::Level picLevel = M->getPICLevel();
3582
3584 TLSModel::Model Model = TM.getTLSModel(GV);
3585
3586 if (Model == TLSModel::LocalExec) {
3587 if (Subtarget.isUsingPCRelativeCalls()) {
3588 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3589 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3591 SDValue MatAddr =
3592 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3593 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3594 }
3595
3596 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3598 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3600 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3601 : DAG.getRegister(PPC::R2, MVT::i32);
3602
3603 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3604 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3605 }
3606
3607 if (Model == TLSModel::InitialExec) {
3608 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3610 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3611 SDValue TGATLS = DAG.getTargetGlobalAddress(
3612 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3613 SDValue TPOffset;
3614 if (IsPCRel) {
3615 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3616 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3618 } else {
3619 SDValue GOTPtr;
3620 if (is64bit) {
3621 setUsesTOCBasePtr(DAG);
3622 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3623 GOTPtr =
3624 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3625 } else {
3626 if (!TM.isPositionIndependent())
3627 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3628 else if (picLevel == PICLevel::SmallPIC)
3629 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3630 else
3631 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3632 }
3633 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3634 }
3635 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3636 }
3637
3638 if (Model == TLSModel::GeneralDynamic) {
3639 if (Subtarget.isUsingPCRelativeCalls()) {
3640 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3642 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3643 }
3644
3645 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3646 SDValue GOTPtr;
3647 if (is64bit) {
3648 setUsesTOCBasePtr(DAG);
3649 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3650 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3651 GOTReg, TGA);
3652 } else {
3653 if (picLevel == PICLevel::SmallPIC)
3654 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3655 else
3656 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3657 }
3658 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3659 GOTPtr, TGA, TGA);
3660 }
3661
3662 if (Model == TLSModel::LocalDynamic) {
3663 if (Subtarget.isUsingPCRelativeCalls()) {
3664 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3666 SDValue MatPCRel =
3667 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3668 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3669 }
3670
3671 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3672 SDValue GOTPtr;
3673 if (is64bit) {
3674 setUsesTOCBasePtr(DAG);
3675 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3676 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3677 GOTReg, TGA);
3678 } else {
3679 if (picLevel == PICLevel::SmallPIC)
3680 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3681 else
3682 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3683 }
3684 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3685 PtrVT, GOTPtr, TGA, TGA);
3686 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3687 PtrVT, TLSAddr, TGA);
3688 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3689 }
3690
3691 llvm_unreachable("Unknown TLS model!");
3692}
3693
3694SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3695 SelectionDAG &DAG) const {
3696 EVT PtrVT = Op.getValueType();
3697 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3698 SDLoc DL(GSDN);
3699 const GlobalValue *GV = GSDN->getGlobal();
3700
3701 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3702 // The actual address of the GlobalValue is stored in the TOC.
3703 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3704 if (Subtarget.isUsingPCRelativeCalls()) {
3705 EVT Ty = getPointerTy(DAG.getDataLayout());
3707 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3709 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3710 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3712 return Load;
3713 } else {
3714 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3716 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3717 }
3718 }
3719 setUsesTOCBasePtr(DAG);
3720 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3721 return getTOCEntry(DAG, DL, GA);
3722 }
3723
3724 unsigned MOHiFlag, MOLoFlag;
3725 bool IsPIC = isPositionIndependent();
3726 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3727
3728 if (IsPIC && Subtarget.isSVR4ABI()) {
3729 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3730 GSDN->getOffset(),
3732 return getTOCEntry(DAG, DL, GA);
3733 }
3734
3735 SDValue GAHi =
3736 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3737 SDValue GALo =
3738 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3739
3740 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3741}
3742
3743SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3744 bool IsStrict = Op->isStrictFPOpcode();
3746 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3747 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3748 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3749 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3750 EVT LHSVT = LHS.getValueType();
3751 SDLoc dl(Op);
3752
3753 // Soften the setcc with libcall if it is fp128.
3754 if (LHSVT == MVT::f128) {
3755 assert(!Subtarget.hasP9Vector() &&
3756 "SETCC for f128 is already legal under Power9!");
3757 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3758 Op->getOpcode() == ISD::STRICT_FSETCCS);
3759 if (RHS.getNode())
3760 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3761 DAG.getCondCode(CC));
3762 if (IsStrict)
3763 return DAG.getMergeValues({LHS, Chain}, dl);
3764 return LHS;
3765 }
3766
3767 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3768
3769 if (Op.getValueType() == MVT::v2i64) {
3770 // When the operands themselves are v2i64 values, we need to do something
3771 // special because VSX has no underlying comparison operations for these.
3772 if (LHS.getValueType() == MVT::v2i64) {
3773 // Equality can be handled by casting to the legal type for Altivec
3774 // comparisons, everything else needs to be expanded.
3775 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3776 return SDValue();
3777 SDValue SetCC32 = DAG.getSetCC(
3778 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3779 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3780 int ShuffV[] = {1, 0, 3, 2};
3781 SDValue Shuff =
3782 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3783 return DAG.getBitcast(MVT::v2i64,
3785 dl, MVT::v4i32, Shuff, SetCC32));
3786 }
3787
3788 // We handle most of these in the usual way.
3789 return Op;
3790 }
3791
3792 // If we're comparing for equality to zero, expose the fact that this is
3793 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3794 // fold the new nodes.
3795 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3796 return V;
3797
3798 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3799 // Leave comparisons against 0 and -1 alone for now, since they're usually
3800 // optimized. FIXME: revisit this when we can custom lower all setcc
3801 // optimizations.
3802 if (C->isAllOnes() || C->isZero())
3803 return SDValue();
3804 }
3805
3806 // If we have an integer seteq/setne, turn it into a compare against zero
3807 // by xor'ing the rhs with the lhs, which is faster than setting a
3808 // condition register, reading it back out, and masking the correct bit. The
3809 // normal approach here uses sub to do this instead of xor. Using xor exposes
3810 // the result to other bit-twiddling opportunities.
3811 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3812 EVT VT = Op.getValueType();
3813 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3814 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3815 }
3816 return SDValue();
3817}
3818
3819SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3820 SDNode *Node = Op.getNode();
3821 EVT VT = Node->getValueType(0);
3822 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3823 SDValue InChain = Node->getOperand(0);
3824 SDValue VAListPtr = Node->getOperand(1);
3825 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3826 SDLoc dl(Node);
3827
3828 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3829
3830 // gpr_index
3831 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3832 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3833 InChain = GprIndex.getValue(1);
3834
3835 if (VT == MVT::i64) {
3836 // Check if GprIndex is even
3837 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3838 DAG.getConstant(1, dl, MVT::i32));
3839 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3840 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3841 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3842 DAG.getConstant(1, dl, MVT::i32));
3843 // Align GprIndex to be even if it isn't
3844 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3845 GprIndex);
3846 }
3847
3848 // fpr index is 1 byte after gpr
3849 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(1, dl, MVT::i32));
3851
3852 // fpr
3853 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3854 FprPtr, MachinePointerInfo(SV), MVT::i8);
3855 InChain = FprIndex.getValue(1);
3856
3857 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3858 DAG.getConstant(8, dl, MVT::i32));
3859
3860 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3861 DAG.getConstant(4, dl, MVT::i32));
3862
3863 // areas
3864 SDValue OverflowArea =
3865 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3866 InChain = OverflowArea.getValue(1);
3867
3868 SDValue RegSaveArea =
3869 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3870 InChain = RegSaveArea.getValue(1);
3871
3872 // select overflow_area if index > 8
3873 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3874 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3875
3876 // adjustment constant gpr_index * 4/8
3877 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3878 VT.isInteger() ? GprIndex : FprIndex,
3879 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3880 MVT::i32));
3881
3882 // OurReg = RegSaveArea + RegConstant
3883 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3884 RegConstant);
3885
3886 // Floating types are 32 bytes into RegSaveArea
3887 if (VT.isFloatingPoint())
3888 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3889 DAG.getConstant(32, dl, MVT::i32));
3890
3891 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3892 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3893 VT.isInteger() ? GprIndex : FprIndex,
3894 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3895 MVT::i32));
3896
3897 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3898 VT.isInteger() ? VAListPtr : FprPtr,
3899 MachinePointerInfo(SV), MVT::i8);
3900
3901 // determine if we should load from reg_save_area or overflow_area
3902 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3903
3904 // increase overflow_area by 4/8 if gpr/fpr > 8
3905 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3906 DAG.getConstant(VT.isInteger() ? 4 : 8,
3907 dl, MVT::i32));
3908
3909 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3910 OverflowAreaPlusN);
3911
3912 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3913 MachinePointerInfo(), MVT::i32);
3914
3915 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3916}
3917
3918SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3919 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3920
3921 // We have to copy the entire va_list struct:
3922 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3923 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3924 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3925 false, true, /*CI=*/nullptr, std::nullopt,
3927}
3928
3929SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3930 SelectionDAG &DAG) const {
3931 if (Subtarget.isAIXABI())
3932 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3933
3934 return Op.getOperand(0);
3935}
3936
3937SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3940
3941 assert((Op.getOpcode() == ISD::INLINEASM ||
3942 Op.getOpcode() == ISD::INLINEASM_BR) &&
3943 "Expecting Inline ASM node.");
3944
3945 // If an LR store is already known to be required then there is not point in
3946 // checking this ASM as well.
3947 if (MFI.isLRStoreRequired())
3948 return Op;
3949
3950 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3951 // type MVT::Glue. We want to ignore this last operand if that is the case.
3952 unsigned NumOps = Op.getNumOperands();
3953 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3954 --NumOps;
3955
3956 // Check all operands that may contain the LR.
3957 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3958 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3959 unsigned NumVals = Flags.getNumOperandRegisters();
3960 ++i; // Skip the ID value.
3961
3962 switch (Flags.getKind()) {
3963 default:
3964 llvm_unreachable("Bad flags!");
3968 i += NumVals;
3969 break;
3973 for (; NumVals; --NumVals, ++i) {
3974 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3975 if (Reg != PPC::LR && Reg != PPC::LR8)
3976 continue;
3977 MFI.setLRStoreRequired();
3978 return Op;
3979 }
3980 break;
3981 }
3982 }
3983 }
3984
3985 return Op;
3986}
3987
3988SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3989 SelectionDAG &DAG) const {
3990 if (Subtarget.isAIXABI())
3991 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3992
3993 SDValue Chain = Op.getOperand(0);
3994 SDValue Trmp = Op.getOperand(1); // trampoline
3995 SDValue FPtr = Op.getOperand(2); // nested function
3996 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3997 SDLoc dl(Op);
3998
3999 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4000 bool isPPC64 = (PtrVT == MVT::i64);
4001 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4002
4005
4006 Entry.Ty = IntPtrTy;
4007 Entry.Node = Trmp; Args.push_back(Entry);
4008
4009 // TrampSize == (isPPC64 ? 48 : 40);
4010 Entry.Node =
4011 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT());
4012 Args.push_back(Entry);
4013
4014 Entry.Node = FPtr; Args.push_back(Entry);
4015 Entry.Node = Nest; Args.push_back(Entry);
4016
4017 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4019 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4021 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4022
4023 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4024 return CallResult.second;
4025}
4026
4027SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4029 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4030 EVT PtrVT = getPointerTy(MF.getDataLayout());
4031
4032 SDLoc dl(Op);
4033
4034 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4035 // vastart just stores the address of the VarArgsFrameIndex slot into the
4036 // memory location argument.
4037 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4038 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4039 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4040 MachinePointerInfo(SV));
4041 }
4042
4043 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4044 // We suppose the given va_list is already allocated.
4045 //
4046 // typedef struct {
4047 // char gpr; /* index into the array of 8 GPRs
4048 // * stored in the register save area
4049 // * gpr=0 corresponds to r3,
4050 // * gpr=1 to r4, etc.
4051 // */
4052 // char fpr; /* index into the array of 8 FPRs
4053 // * stored in the register save area
4054 // * fpr=0 corresponds to f1,
4055 // * fpr=1 to f2, etc.
4056 // */
4057 // char *overflow_arg_area;
4058 // /* location on stack that holds
4059 // * the next overflow argument
4060 // */
4061 // char *reg_save_area;
4062 // /* where r3:r10 and f1:f8 (if saved)
4063 // * are stored
4064 // */
4065 // } va_list[1];
4066
4067 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4068 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4069 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4070 PtrVT);
4071 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4072 PtrVT);
4073
4074 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4075 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4076
4077 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4078 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4079
4080 uint64_t FPROffset = 1;
4081 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4082
4083 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4084
4085 // Store first byte : number of int regs
4086 SDValue firstStore =
4087 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4088 MachinePointerInfo(SV), MVT::i8);
4089 uint64_t nextOffset = FPROffset;
4090 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4091 ConstFPROffset);
4092
4093 // Store second byte : number of float regs
4094 SDValue secondStore =
4095 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4096 MachinePointerInfo(SV, nextOffset), MVT::i8);
4097 nextOffset += StackOffset;
4098 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4099
4100 // Store second word : arguments given on stack
4101 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4102 MachinePointerInfo(SV, nextOffset));
4103 nextOffset += FrameOffset;
4104 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4105
4106 // Store third word : arguments given in registers
4107 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4108 MachinePointerInfo(SV, nextOffset));
4109}
4110
4111/// FPR - The set of FP registers that should be allocated for arguments
4112/// on Darwin and AIX.
4113static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4114 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4115 PPC::F11, PPC::F12, PPC::F13};
4116
4117/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4118/// the stack.
4119static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4120 unsigned PtrByteSize) {
4121 unsigned ArgSize = ArgVT.getStoreSize();
4122 if (Flags.isByVal())
4123 ArgSize = Flags.getByValSize();
4124
4125 // Round up to multiples of the pointer size, except for array members,
4126 // which are always packed.
4127 if (!Flags.isInConsecutiveRegs())
4128 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4129
4130 return ArgSize;
4131}
4132
4133/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4134/// on the stack.
4136 ISD::ArgFlagsTy Flags,
4137 unsigned PtrByteSize) {
4138 Align Alignment(PtrByteSize);
4139
4140 // Altivec parameters are padded to a 16 byte boundary.
4141 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4142 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4143 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4144 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4145 Alignment = Align(16);
4146
4147 // ByVal parameters are aligned as requested.
4148 if (Flags.isByVal()) {
4149 auto BVAlign = Flags.getNonZeroByValAlign();
4150 if (BVAlign > PtrByteSize) {
4151 if (BVAlign.value() % PtrByteSize != 0)
4153 "ByVal alignment is not a multiple of the pointer size");
4154
4155 Alignment = BVAlign;
4156 }
4157 }
4158
4159 // Array members are always packed to their original alignment.
4160 if (Flags.isInConsecutiveRegs()) {
4161 // If the array member was split into multiple registers, the first
4162 // needs to be aligned to the size of the full type. (Except for
4163 // ppcf128, which is only aligned as its f64 components.)
4164 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4165 Alignment = Align(OrigVT.getStoreSize());
4166 else
4167 Alignment = Align(ArgVT.getStoreSize());
4168 }
4169
4170 return Alignment;
4171}
4172
4173/// CalculateStackSlotUsed - Return whether this argument will use its
4174/// stack slot (instead of being passed in registers). ArgOffset,
4175/// AvailableFPRs, and AvailableVRs must hold the current argument
4176/// position, and will be updated to account for this argument.
4177static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4178 unsigned PtrByteSize, unsigned LinkageSize,
4179 unsigned ParamAreaSize, unsigned &ArgOffset,
4180 unsigned &AvailableFPRs,
4181 unsigned &AvailableVRs) {
4182 bool UseMemory = false;
4183
4184 // Respect alignment of argument on the stack.
4185 Align Alignment =
4186 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4187 ArgOffset = alignTo(ArgOffset, Alignment);
4188 // If there's no space left in the argument save area, we must
4189 // use memory (this check also catches zero-sized arguments).
4190 if (ArgOffset >= LinkageSize + ParamAreaSize)
4191 UseMemory = true;
4192
4193 // Allocate argument on the stack.
4194 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4195 if (Flags.isInConsecutiveRegsLast())
4196 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4197 // If we overran the argument save area, we must use memory
4198 // (this check catches arguments passed partially in memory)
4199 if (ArgOffset > LinkageSize + ParamAreaSize)
4200 UseMemory = true;
4201
4202 // However, if the argument is actually passed in an FPR or a VR,
4203 // we don't use memory after all.
4204 if (!Flags.isByVal()) {
4205 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4206 if (AvailableFPRs > 0) {
4207 --AvailableFPRs;
4208 return false;
4209 }
4210 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4211 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4212 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4213 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4214 if (AvailableVRs > 0) {
4215 --AvailableVRs;
4216 return false;
4217 }
4218 }
4219
4220 return UseMemory;
4221}
4222
4223/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4224/// ensure minimum alignment required for target.
4226 unsigned NumBytes) {
4227 return alignTo(NumBytes, Lowering->getStackAlign());
4228}
4229
4230SDValue PPCTargetLowering::LowerFormalArguments(
4231 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4232 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4233 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4234 if (Subtarget.isAIXABI())
4235 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4236 InVals);
4237 if (Subtarget.is64BitELFABI())
4238 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4239 InVals);
4240 assert(Subtarget.is32BitELFABI());
4241 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4242 InVals);
4243}
4244
4245SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4246 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4247 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4248 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4249
4250 // 32-bit SVR4 ABI Stack Frame Layout:
4251 // +-----------------------------------+
4252 // +--> | Back chain |
4253 // | +-----------------------------------+
4254 // | | Floating-point register save area |
4255 // | +-----------------------------------+
4256 // | | General register save area |
4257 // | +-----------------------------------+
4258 // | | CR save word |
4259 // | +-----------------------------------+
4260 // | | VRSAVE save word |
4261 // | +-----------------------------------+
4262 // | | Alignment padding |
4263 // | +-----------------------------------+
4264 // | | Vector register save area |
4265 // | +-----------------------------------+
4266 // | | Local variable space |
4267 // | +-----------------------------------+
4268 // | | Parameter list area |
4269 // | +-----------------------------------+
4270 // | | LR save word |
4271 // | +-----------------------------------+
4272 // SP--> +--- | Back chain |
4273 // +-----------------------------------+
4274 //
4275 // Specifications:
4276 // System V Application Binary Interface PowerPC Processor Supplement
4277 // AltiVec Technology Programming Interface Manual
4278
4280 MachineFrameInfo &MFI = MF.getFrameInfo();
4281 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4282
4283 EVT PtrVT = getPointerTy(MF.getDataLayout());
4284 // Potential tail calls could cause overwriting of argument stack slots.
4285 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4286 (CallConv == CallingConv::Fast));
4287 const Align PtrAlign(4);
4288
4289 // Assign locations to all of the incoming arguments.
4291 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4292 *DAG.getContext());
4293
4294 // Reserve space for the linkage area on the stack.
4295 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4296 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4297 if (useSoftFloat())
4298 CCInfo.PreAnalyzeFormalArguments(Ins);
4299
4300 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4301 CCInfo.clearWasPPCF128();
4302
4303 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4304 CCValAssign &VA = ArgLocs[i];
4305
4306 // Arguments stored in registers.
4307 if (VA.isRegLoc()) {
4308 const TargetRegisterClass *RC;
4309 EVT ValVT = VA.getValVT();
4310
4311 switch (ValVT.getSimpleVT().SimpleTy) {
4312 default:
4313 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4314 case MVT::i1:
4315 case MVT::i32:
4316 RC = &PPC::GPRCRegClass;
4317 break;
4318 case MVT::f32:
4319 if (Subtarget.hasP8Vector())
4320 RC = &PPC::VSSRCRegClass;
4321 else if (Subtarget.hasSPE())
4322 RC = &PPC::GPRCRegClass;
4323 else
4324 RC = &PPC::F4RCRegClass;
4325 break;
4326 case MVT::f64:
4327 if (Subtarget.hasVSX())
4328 RC = &PPC::VSFRCRegClass;
4329 else if (Subtarget.hasSPE())
4330 // SPE passes doubles in GPR pairs.
4331 RC = &PPC::GPRCRegClass;
4332 else
4333 RC = &PPC::F8RCRegClass;
4334 break;
4335 case MVT::v16i8:
4336 case MVT::v8i16:
4337 case MVT::v4i32:
4338 RC = &PPC::VRRCRegClass;
4339 break;
4340 case MVT::v4f32:
4341 RC = &PPC::VRRCRegClass;
4342 break;
4343 case MVT::v2f64:
4344 case MVT::v2i64:
4345 RC = &PPC::VRRCRegClass;
4346 break;
4347 }
4348
4349 SDValue ArgValue;
4350 // Transform the arguments stored in physical registers into
4351 // virtual ones.
4352 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4353 assert(i + 1 < e && "No second half of double precision argument");
4354 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4355 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4356 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4357 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4358 if (!Subtarget.isLittleEndian())
4359 std::swap (ArgValueLo, ArgValueHi);
4360 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4361 ArgValueHi);
4362 } else {
4363 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4364 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4365 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4366 if (ValVT == MVT::i1)
4367 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4368 }
4369
4370 InVals.push_back(ArgValue);
4371 } else {
4372 // Argument stored in memory.
4373 assert(VA.isMemLoc());
4374
4375 // Get the extended size of the argument type in stack
4376 unsigned ArgSize = VA.getLocVT().getStoreSize();
4377 // Get the actual size of the argument type
4378 unsigned ObjSize = VA.getValVT().getStoreSize();
4379 unsigned ArgOffset = VA.getLocMemOffset();
4380 // Stack objects in PPC32 are right justified.
4381 ArgOffset += ArgSize - ObjSize;
4382 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4383
4384 // Create load nodes to retrieve arguments from the stack.
4385 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4386 InVals.push_back(
4387 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4388 }
4389 }
4390
4391 // Assign locations to all of the incoming aggregate by value arguments.
4392 // Aggregates passed by value are stored in the local variable space of the
4393 // caller's stack frame, right above the parameter list area.
4394 SmallVector<CCValAssign, 16> ByValArgLocs;
4395 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4396 ByValArgLocs, *DAG.getContext());
4397
4398 // Reserve stack space for the allocations in CCInfo.
4399 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4400
4401 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4402
4403 // Area that is at least reserved in the caller of this function.
4404 unsigned MinReservedArea = CCByValInfo.getStackSize();
4405 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4406
4407 // Set the size that is at least reserved in caller of this function. Tail
4408 // call optimized function's reserved stack space needs to be aligned so that
4409 // taking the difference between two stack areas will result in an aligned
4410 // stack.
4411 MinReservedArea =
4412 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4413 FuncInfo->setMinReservedArea(MinReservedArea);
4414
4416
4417 // If the function takes variable number of arguments, make a frame index for
4418 // the start of the first vararg value... for expansion of llvm.va_start.
4419 if (isVarArg) {
4420 static const MCPhysReg GPArgRegs[] = {
4421 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4422 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4423 };
4424 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4425
4426 static const MCPhysReg FPArgRegs[] = {
4427 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4428 PPC::F8
4429 };
4430 unsigned NumFPArgRegs = std::size(FPArgRegs);
4431
4432 if (useSoftFloat() || hasSPE())
4433 NumFPArgRegs = 0;
4434
4435 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4436 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4437
4438 // Make room for NumGPArgRegs and NumFPArgRegs.
4439 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4440 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4441
4443 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4444
4445 FuncInfo->setVarArgsFrameIndex(
4446 MFI.CreateStackObject(Depth, Align(8), false));
4447 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4448
4449 // The fixed integer arguments of a variadic function are stored to the
4450 // VarArgsFrameIndex on the stack so that they may be loaded by
4451 // dereferencing the result of va_next.
4452 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4453 // Get an existing live-in vreg, or add a new one.
4454 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4455 if (!VReg)
4456 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4457
4458 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4459 SDValue Store =
4460 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4461 MemOps.push_back(Store);
4462 // Increment the address by four for the next argument to store
4463 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4464 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4465 }
4466
4467 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4468 // is set.
4469 // The double arguments are stored to the VarArgsFrameIndex
4470 // on the stack.
4471 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4472 // Get an existing live-in vreg, or add a new one.
4473 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4474 if (!VReg)
4475 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4476
4477 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4478 SDValue Store =
4479 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4480 MemOps.push_back(Store);
4481 // Increment the address by eight for the next argument to store
4482 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4483 PtrVT);
4484 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4485 }
4486 }
4487
4488 if (!MemOps.empty())
4489 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4490
4491 return Chain;
4492}
4493
4494// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4495// value to MVT::i64 and then truncate to the correct register size.
4496SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4497 EVT ObjectVT, SelectionDAG &DAG,
4498 SDValue ArgVal,
4499 const SDLoc &dl) const {
4500 if (Flags.isSExt())
4501 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4502 DAG.getValueType(ObjectVT));
4503 else if (Flags.isZExt())
4504 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4505 DAG.getValueType(ObjectVT));
4506
4507 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4508}
4509
4510SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4511 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4512 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4513 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4514 // TODO: add description of PPC stack frame format, or at least some docs.
4515 //
4516 bool isELFv2ABI = Subtarget.isELFv2ABI();
4517 bool isLittleEndian = Subtarget.isLittleEndian();
4519 MachineFrameInfo &MFI = MF.getFrameInfo();
4520 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4521
4522 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4523 "fastcc not supported on varargs functions");
4524
4525 EVT PtrVT = getPointerTy(MF.getDataLayout());
4526 // Potential tail calls could cause overwriting of argument stack slots.
4527 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4528 (CallConv == CallingConv::Fast));
4529 unsigned PtrByteSize = 8;
4530 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531
4532 static const MCPhysReg GPR[] = {
4533 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535 };
4536 static const MCPhysReg VR[] = {
4537 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539 };
4540
4541 const unsigned Num_GPR_Regs = std::size(GPR);
4542 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4543 const unsigned Num_VR_Regs = std::size(VR);
4544
4545 // Do a first pass over the arguments to determine whether the ABI
4546 // guarantees that our caller has allocated the parameter save area
4547 // on its stack frame. In the ELFv1 ABI, this is always the case;
4548 // in the ELFv2 ABI, it is true if this is a vararg function or if
4549 // any parameter is located in a stack slot.
4550
4551 bool HasParameterArea = !isELFv2ABI || isVarArg;
4552 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4553 unsigned NumBytes = LinkageSize;
4554 unsigned AvailableFPRs = Num_FPR_Regs;
4555 unsigned AvailableVRs = Num_VR_Regs;
4556 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4557 if (Ins[i].Flags.isNest())
4558 continue;
4559
4560 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4561 PtrByteSize, LinkageSize, ParamAreaSize,
4562 NumBytes, AvailableFPRs, AvailableVRs))
4563 HasParameterArea = true;
4564 }
4565
4566 // Add DAG nodes to load the arguments or copy them out of registers. On
4567 // entry to a function on PPC, the arguments start after the linkage area,
4568 // although the first ones are often in registers.
4569
4570 unsigned ArgOffset = LinkageSize;
4571 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4574 unsigned CurArgIdx = 0;
4575 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4576 SDValue ArgVal;
4577 bool needsLoad = false;
4578 EVT ObjectVT = Ins[ArgNo].VT;
4579 EVT OrigVT = Ins[ArgNo].ArgVT;
4580 unsigned ObjSize = ObjectVT.getStoreSize();
4581 unsigned ArgSize = ObjSize;
4582 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4583 if (Ins[ArgNo].isOrigArg()) {
4584 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4585 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4586 }
4587 // We re-align the argument offset for each argument, except when using the
4588 // fast calling convention, when we need to make sure we do that only when
4589 // we'll actually use a stack slot.
4590 unsigned CurArgOffset;
4591 Align Alignment;
4592 auto ComputeArgOffset = [&]() {
4593 /* Respect alignment of argument on the stack. */
4594 Alignment =
4595 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4596 ArgOffset = alignTo(ArgOffset, Alignment);
4597 CurArgOffset = ArgOffset;
4598 };
4599
4600 if (CallConv != CallingConv::Fast) {
4601 ComputeArgOffset();
4602
4603 /* Compute GPR index associated with argument offset. */
4604 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4605 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4606 }
4607
4608 // FIXME the codegen can be much improved in some cases.
4609 // We do not have to keep everything in memory.
4610 if (Flags.isByVal()) {
4611 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4612
4613 if (CallConv == CallingConv::Fast)
4614 ComputeArgOffset();
4615
4616 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4617 ObjSize = Flags.getByValSize();
4618 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4619 // Empty aggregate parameters do not take up registers. Examples:
4620 // struct { } a;
4621 // union { } b;
4622 // int c[0];
4623 // etc. However, we have to provide a place-holder in InVals, so
4624 // pretend we have an 8-byte item at the current address for that
4625 // purpose.
4626 if (!ObjSize) {
4627 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4628 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629 InVals.push_back(FIN);
4630 continue;
4631 }
4632
4633 // Create a stack object covering all stack doublewords occupied
4634 // by the argument. If the argument is (fully or partially) on
4635 // the stack, or if the argument is fully in registers but the
4636 // caller has allocated the parameter save anyway, we can refer
4637 // directly to the caller's stack frame. Otherwise, create a
4638 // local copy in our own frame.
4639 int FI;
4640 if (HasParameterArea ||
4641 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4642 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4643 else
4644 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4645 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4646
4647 // Handle aggregates smaller than 8 bytes.
4648 if (ObjSize < PtrByteSize) {
4649 // The value of the object is its address, which differs from the
4650 // address of the enclosing doubleword on big-endian systems.
4651 SDValue Arg = FIN;
4652 if (!isLittleEndian) {
4653 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4654 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4655 }
4656 InVals.push_back(Arg);
4657
4658 if (GPR_idx != Num_GPR_Regs) {
4659 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4660 FuncInfo->addLiveInAttr(VReg, Flags);
4661 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4662 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4663 SDValue Store =
4664 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4665 MachinePointerInfo(&*FuncArg), ObjType);
4666 MemOps.push_back(Store);
4667 }
4668 // Whether we copied from a register or not, advance the offset
4669 // into the parameter save area by a full doubleword.
4670 ArgOffset += PtrByteSize;
4671 continue;
4672 }
4673
4674 // The value of the object is its address, which is the address of
4675 // its first stack doubleword.
4676 InVals.push_back(FIN);
4677
4678 // Store whatever pieces of the object are in registers to memory.
4679 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4680 if (GPR_idx == Num_GPR_Regs)
4681 break;
4682
4683 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4684 FuncInfo->addLiveInAttr(VReg, Flags);
4685 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4686 SDValue Addr = FIN;
4687 if (j) {
4688 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4689 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4690 }
4691 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4692 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4693 SDValue Store =
4694 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4695 MachinePointerInfo(&*FuncArg, j), ObjType);
4696 MemOps.push_back(Store);
4697 ++GPR_idx;
4698 }
4699 ArgOffset += ArgSize;
4700 continue;
4701 }
4702
4703 switch (ObjectVT.getSimpleVT().SimpleTy) {
4704 default: llvm_unreachable("Unhandled argument type!");
4705 case MVT::i1:
4706 case MVT::i32:
4707 case MVT::i64:
4708 if (Flags.isNest()) {
4709 // The 'nest' parameter, if any, is passed in R11.
4710 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4711 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4712
4713 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4714 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4715
4716 break;
4717 }
4718
4719 // These can be scalar arguments or elements of an integer array type
4720 // passed directly. Clang may use those instead of "byval" aggregate
4721 // types to avoid forcing arguments to memory unnecessarily.
4722 if (GPR_idx != Num_GPR_Regs) {
4723 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4724 FuncInfo->addLiveInAttr(VReg, Flags);
4725 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4726
4727 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4728 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4729 // value to MVT::i64 and then truncate to the correct register size.
4730 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4731 } else {
4732 if (CallConv == CallingConv::Fast)
4733 ComputeArgOffset();
4734
4735 needsLoad = true;
4736 ArgSize = PtrByteSize;
4737 }
4738 if (CallConv != CallingConv::Fast || needsLoad)
4739 ArgOffset += 8;
4740 break;
4741
4742 case MVT::f32:
4743 case MVT::f64:
4744 // These can be scalar arguments or elements of a float array type
4745 // passed directly. The latter are used to implement ELFv2 homogenous
4746 // float aggregates.
4747 if (FPR_idx != Num_FPR_Regs) {
4748 unsigned VReg;
4749
4750 if (ObjectVT == MVT::f32)
4751 VReg = MF.addLiveIn(FPR[FPR_idx],
4752 Subtarget.hasP8Vector()
4753 ? &PPC::VSSRCRegClass
4754 : &PPC::F4RCRegClass);
4755 else
4756 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4757 ? &PPC::VSFRCRegClass
4758 : &PPC::F8RCRegClass);
4759
4760 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4761 ++FPR_idx;
4762 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4763 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4764 // once we support fp <-> gpr moves.
4765
4766 // This can only ever happen in the presence of f32 array types,
4767 // since otherwise we never run out of FPRs before running out
4768 // of GPRs.
4769 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4770 FuncInfo->addLiveInAttr(VReg, Flags);
4771 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4772
4773 if (ObjectVT == MVT::f32) {
4774 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4775 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4776 DAG.getConstant(32, dl, MVT::i32));
4777 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4778 }
4779
4780 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4781 } else {
4782 if (CallConv == CallingConv::Fast)
4783 ComputeArgOffset();
4784
4785 needsLoad = true;
4786 }
4787
4788 // When passing an array of floats, the array occupies consecutive
4789 // space in the argument area; only round up to the next doubleword
4790 // at the end of the array. Otherwise, each float takes 8 bytes.
4791 if (CallConv != CallingConv::Fast || needsLoad) {
4792 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4793 ArgOffset += ArgSize;
4794 if (Flags.isInConsecutiveRegsLast())
4795 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4796 }
4797 break;
4798 case MVT::v4f32:
4799 case MVT::v4i32:
4800 case MVT::v8i16:
4801 case MVT::v16i8:
4802 case MVT::v2f64:
4803 case MVT::v2i64:
4804 case MVT::v1i128:
4805 case MVT::f128:
4806 // These can be scalar arguments or elements of a vector array type
4807 // passed directly. The latter are used to implement ELFv2 homogenous
4808 // vector aggregates.
4809 if (VR_idx != Num_VR_Regs) {
4810 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4811 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4812 ++VR_idx;
4813 } else {
4814 if (CallConv == CallingConv::Fast)
4815 ComputeArgOffset();
4816 needsLoad = true;
4817 }
4818 if (CallConv != CallingConv::Fast || needsLoad)
4819 ArgOffset += 16;
4820 break;
4821 }
4822
4823 // We need to load the argument to a virtual register if we determined
4824 // above that we ran out of physical registers of the appropriate type.
4825 if (needsLoad) {
4826 if (ObjSize < ArgSize && !isLittleEndian)
4827 CurArgOffset += ArgSize - ObjSize;
4828 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4829 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4830 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4831 }
4832
4833 InVals.push_back(ArgVal);
4834 }
4835
4836 // Area that is at least reserved in the caller of this function.
4837 unsigned MinReservedArea;
4838 if (HasParameterArea)
4839 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4840 else
4841 MinReservedArea = LinkageSize;
4842
4843 // Set the size that is at least reserved in caller of this function. Tail
4844 // call optimized functions' reserved stack space needs to be aligned so that
4845 // taking the difference between two stack areas will result in an aligned
4846 // stack.
4847 MinReservedArea =
4848 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4849 FuncInfo->setMinReservedArea(MinReservedArea);
4850
4851 // If the function takes variable number of arguments, make a frame index for
4852 // the start of the first vararg value... for expansion of llvm.va_start.
4853 // On ELFv2ABI spec, it writes:
4854 // C programs that are intended to be *portable* across different compilers
4855 // and architectures must use the header file <stdarg.h> to deal with variable
4856 // argument lists.
4857 if (isVarArg && MFI.hasVAStart()) {
4858 int Depth = ArgOffset;
4859
4860 FuncInfo->setVarArgsFrameIndex(
4861 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4862 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4863
4864 // If this function is vararg, store any remaining integer argument regs
4865 // to their spots on the stack so that they may be loaded by dereferencing
4866 // the result of va_next.
4867 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4868 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4869 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4870 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4871 SDValue Store =
4872 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4873 MemOps.push_back(Store);
4874 // Increment the address by four for the next argument to store
4875 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4876 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4877 }
4878 }
4879
4880 if (!MemOps.empty())
4881 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4882
4883 return Chain;
4884}
4885
4886/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4887/// adjusted to accommodate the arguments for the tailcall.
4888static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4889 unsigned ParamSize) {
4890
4891 if (!isTailCall) return 0;
4892
4894 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4895 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4896 // Remember only if the new adjustment is bigger.
4897 if (SPDiff < FI->getTailCallSPDelta())
4898 FI->setTailCallSPDelta(SPDiff);
4899
4900 return SPDiff;
4901}
4902
4903static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4904
4905static bool callsShareTOCBase(const Function *Caller,
4906 const GlobalValue *CalleeGV,
4907 const TargetMachine &TM) {
4908 // It does not make sense to call callsShareTOCBase() with a caller that
4909 // is PC Relative since PC Relative callers do not have a TOC.
4910#ifndef NDEBUG
4911 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4912 assert(!STICaller->isUsingPCRelativeCalls() &&
4913 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4914#endif
4915
4916 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4917 // don't have enough information to determine if the caller and callee share
4918 // the same TOC base, so we have to pessimistically assume they don't for
4919 // correctness.
4920 if (!CalleeGV)
4921 return false;
4922
4923 // If the callee is preemptable, then the static linker will use a plt-stub
4924 // which saves the toc to the stack, and needs a nop after the call
4925 // instruction to convert to a toc-restore.
4926 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4927 return false;
4928
4929 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4930 // We may need a TOC restore in the situation where the caller requires a
4931 // valid TOC but the callee is PC Relative and does not.
4932 const Function *F = dyn_cast<Function>(CalleeGV);
4933 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4934
4935 // If we have an Alias we can try to get the function from there.
4936 if (Alias) {
4937 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4938 F = dyn_cast<Function>(GlobalObj);
4939 }
4940
4941 // If we still have no valid function pointer we do not have enough
4942 // information to determine if the callee uses PC Relative calls so we must
4943 // assume that it does.
4944 if (!F)
4945 return false;
4946
4947 // If the callee uses PC Relative we cannot guarantee that the callee won't
4948 // clobber the TOC of the caller and so we must assume that the two
4949 // functions do not share a TOC base.
4950 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4951 if (STICallee->isUsingPCRelativeCalls())
4952 return false;
4953
4954 // If the GV is not a strong definition then we need to assume it can be
4955 // replaced by another function at link time. The function that replaces
4956 // it may not share the same TOC as the caller since the callee may be
4957 // replaced by a PC Relative version of the same function.
4958 if (!CalleeGV->isStrongDefinitionForLinker())
4959 return false;
4960
4961 // The medium and large code models are expected to provide a sufficiently
4962 // large TOC to provide all data addressing needs of a module with a
4963 // single TOC.
4964 if (CodeModel::Medium == TM.getCodeModel() ||
4965 CodeModel::Large == TM.getCodeModel())
4966 return true;
4967
4968 // Any explicitly-specified sections and section prefixes must also match.
4969 // Also, if we're using -ffunction-sections, then each function is always in
4970 // a different section (the same is true for COMDAT functions).
4971 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4972 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4973 return false;
4974 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4975 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4976 return false;
4977 }
4978
4979 return true;
4980}
4981
4982static bool
4984 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4985 assert(Subtarget.is64BitELFABI());
4986
4987 const unsigned PtrByteSize = 8;
4988 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4989
4990 static const MCPhysReg GPR[] = {
4991 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4992 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4993 };
4994 static const MCPhysReg VR[] = {
4995 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4996 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4997 };
4998
4999 const unsigned NumGPRs = std::size(GPR);
5000 const unsigned NumFPRs = 13;
5001 const unsigned NumVRs = std::size(VR);
5002 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5003
5004 unsigned NumBytes = LinkageSize;
5005 unsigned AvailableFPRs = NumFPRs;
5006 unsigned AvailableVRs = NumVRs;
5007
5008 for (const ISD::OutputArg& Param : Outs) {
5009 if (Param.Flags.isNest()) continue;
5010
5011 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5012 LinkageSize, ParamAreaSize, NumBytes,
5013 AvailableFPRs, AvailableVRs))
5014 return true;
5015 }
5016 return false;
5017}
5018
5019static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5020 if (CB.arg_size() != CallerFn->arg_size())
5021 return false;
5022
5023 auto CalleeArgIter = CB.arg_begin();
5024 auto CalleeArgEnd = CB.arg_end();
5025 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5026
5027 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5028 const Value* CalleeArg = *CalleeArgIter;
5029 const Value* CallerArg = &(*CallerArgIter);
5030 if (CalleeArg == CallerArg)
5031 continue;
5032
5033 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5034 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5035 // }
5036 // 1st argument of callee is undef and has the same type as caller.
5037 if (CalleeArg->getType() == CallerArg->getType() &&
5038 isa<UndefValue>(CalleeArg))
5039 continue;
5040
5041 return false;
5042 }
5043
5044 return true;
5045}
5046
5047// Returns true if TCO is possible between the callers and callees
5048// calling conventions.
5049static bool
5051 CallingConv::ID CalleeCC) {
5052 // Tail calls are possible with fastcc and ccc.
5053 auto isTailCallableCC = [] (CallingConv::ID CC){
5054 return CC == CallingConv::C || CC == CallingConv::Fast;
5055 };
5056 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5057 return false;
5058
5059 // We can safely tail call both fastcc and ccc callees from a c calling
5060 // convention caller. If the caller is fastcc, we may have less stack space
5061 // than a non-fastcc caller with the same signature so disable tail-calls in
5062 // that case.
5063 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5064}
5065
5066bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5067 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5068 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5070 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5071 bool isCalleeExternalSymbol) const {
5072 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5073
5074 if (DisableSCO && !TailCallOpt) return false;
5075
5076 // Variadic argument functions are not supported.
5077 if (isVarArg) return false;
5078
5079 // Check that the calling conventions are compatible for tco.
5080 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5081 return false;
5082
5083 // Caller contains any byval parameter is not supported.
5084 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5085 return false;
5086
5087 // Callee contains any byval parameter is not supported, too.
5088 // Note: This is a quick work around, because in some cases, e.g.
5089 // caller's stack size > callee's stack size, we are still able to apply
5090 // sibling call optimization. For example, gcc is able to do SCO for caller1
5091 // in the following example, but not for caller2.
5092 // struct test {
5093 // long int a;
5094 // char ary[56];
5095 // } gTest;
5096 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5097 // b->a = v.a;
5098 // return 0;
5099 // }
5100 // void caller1(struct test a, struct test c, struct test *b) {
5101 // callee(gTest, b); }
5102 // void caller2(struct test *b) { callee(gTest, b); }
5103 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5104 return false;
5105
5106 // If callee and caller use different calling conventions, we cannot pass
5107 // parameters on stack since offsets for the parameter area may be different.
5108 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5109 return false;
5110
5111 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5112 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5113 // callee potentially have different TOC bases then we cannot tail call since
5114 // we need to restore the TOC pointer after the call.
5115 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5116 // We cannot guarantee this for indirect calls or calls to external functions.
5117 // When PC-Relative addressing is used, the concept of the TOC is no longer
5118 // applicable so this check is not required.
5119 // Check first for indirect calls.
5120 if (!Subtarget.isUsingPCRelativeCalls() &&
5121 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5122 return false;
5123
5124 // Check if we share the TOC base.
5125 if (!Subtarget.isUsingPCRelativeCalls() &&
5126 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5127 return false;
5128
5129 // TCO allows altering callee ABI, so we don't have to check further.
5130 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5131 return true;
5132
5133 if (DisableSCO) return false;
5134
5135 // If callee use the same argument list that caller is using, then we can
5136 // apply SCO on this case. If it is not, then we need to check if callee needs
5137 // stack for passing arguments.
5138 // PC Relative tail calls may not have a CallBase.
5139 // If there is no CallBase we cannot verify if we have the same argument
5140 // list so assume that we don't have the same argument list.
5141 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5142 needStackSlotPassParameters(Subtarget, Outs))
5143 return false;
5144 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5145 return false;
5146
5147 return true;
5148}
5149
5150/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5151/// for tail call optimization. Targets which want to do tail call
5152/// optimization should implement this function.
5153bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5154 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5155 CallingConv::ID CallerCC, bool isVarArg,
5156 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5157 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5158 return false;
5159
5160 // Variable argument functions are not supported.
5161 if (isVarArg)
5162 return false;
5163
5164 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5165 // Functions containing by val parameters are not supported.
5166 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5167 return false;
5168
5169 // Non-PIC/GOT tail calls are supported.
5170 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5171 return true;
5172
5173 // At the moment we can only do local tail calls (in same module, hidden
5174 // or protected) if we are generating PIC.
5175 if (CalleeGV)
5176 return CalleeGV->hasHiddenVisibility() ||
5177 CalleeGV->hasProtectedVisibility();
5178 }
5179
5180 return false;
5181}
5182
5183/// isCallCompatibleAddress - Return the immediate to use if the specified
5184/// 32-bit value is representable in the immediate field of a BxA instruction.
5186 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5187 if (!C) return nullptr;
5188
5189 int Addr = C->getZExtValue();
5190 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5191 SignExtend32<26>(Addr) != Addr)
5192 return nullptr; // Top 6 bits have to be sext of immediate.
5193
5194 return DAG
5196 (int)C->getZExtValue() >> 2, SDLoc(Op),
5198 .getNode();
5199}
5200
5201namespace {
5202
5203struct TailCallArgumentInfo {
5204 SDValue Arg;
5205 SDValue FrameIdxOp;
5206 int FrameIdx = 0;
5207
5208 TailCallArgumentInfo() = default;
5209};
5210
5211} // end anonymous namespace
5212
5213/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5215 SelectionDAG &DAG, SDValue Chain,
5216 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5217 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5218 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5219 SDValue Arg = TailCallArgs[i].Arg;
5220 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5221 int FI = TailCallArgs[i].FrameIdx;
5222 // Store relative to framepointer.
5223 MemOpChains.push_back(DAG.getStore(
5224 Chain, dl, Arg, FIN,
5226 }
5227}
5228
5229/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5230/// the appropriate stack slot for the tail call optimized function call.
5232 SDValue OldRetAddr, SDValue OldFP,
5233 int SPDiff, const SDLoc &dl) {
5234 if (SPDiff) {
5235 // Calculate the new stack slot for the return address.
5237 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5238 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5239 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5240 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5241 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5242 NewRetAddrLoc, true);
5243 SDValue NewRetAddrFrIdx =
5244 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5245 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5246 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5247 }
5248 return Chain;
5249}
5250
5251/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5252/// the position of the argument.
5254 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5255 int SPDiff, unsigned ArgOffset,
5256 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5257 int Offset = ArgOffset + SPDiff;
5258 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5259 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5260 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5261 SDValue FIN = DAG.getFrameIndex(FI, VT);
5262 TailCallArgumentInfo Info;
5263 Info.Arg = Arg;
5264 Info.FrameIdxOp = FIN;
5265 Info.FrameIdx = FI;
5266 TailCallArguments.push_back(Info);
5267}
5268
5269/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5270/// stack slot. Returns the chain as result and the loaded frame pointers in
5271/// LROpOut/FPOpout. Used when tail calling.
5272SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5273 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5274 SDValue &FPOpOut, const SDLoc &dl) const {
5275 if (SPDiff) {
5276 // Load the LR and FP stack slot for later adjusting.
5277 LROpOut = getReturnAddrFrameIndex(DAG);
5278 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5280 Chain = SDValue(LROpOut.getNode(), 1);
5281 }
5282 return Chain;
5283}
5284
5285/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5286/// by "Src" to address "Dst" of size "Size". Alignment information is
5287/// specified by the specific parameter attribute. The copy will be passed as
5288/// a byval function parameter.
5289/// Sometimes what we are copying is the end of a larger object, the part that
5290/// does not fit in registers.
5292 SDValue Chain, ISD::ArgFlagsTy Flags,
5293 SelectionDAG &DAG, const SDLoc &dl) {
5294 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5295 return DAG.getMemcpy(
5296 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5297 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5298}
5299
5300/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5301/// tail calls.
5303 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5304 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5305 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5306 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5308 if (!isTailCall) {
5309 if (isVector) {
5310 SDValue StackPtr;
5311 if (isPPC64)
5312 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5313 else
5314 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5315 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5316 DAG.getConstant(ArgOffset, dl, PtrVT));
5317 }
5318 MemOpChains.push_back(
5319 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5320 // Calculate and remember argument location.
5321 } else
5322 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5323 TailCallArguments);
5324}
5325
5326static void
5328 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5329 SDValue FPOp,
5330 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5331 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5332 // might overwrite each other in case of tail call optimization.
5333 SmallVector<SDValue, 8> MemOpChains2;
5334 // Do not flag preceding copytoreg stuff together with the following stuff.
5335 InGlue = SDValue();
5336 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5337 MemOpChains2, dl);
5338 if (!MemOpChains2.empty())
5339 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5340
5341 // Store the return address to the appropriate stack slot.
5342 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5343
5344 // Emit callseq_end just before tailcall node.
5345 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5346 InGlue = Chain.getValue(1);
5347}
5348
5349// Is this global address that of a function that can be called by name? (as
5350// opposed to something that must hold a descriptor for an indirect call).
5351static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5352 if (GV) {
5353 if (GV->isThreadLocal())
5354 return false;
5355
5356 return GV->getValueType()->isFunctionTy();
5357 }
5358
5359 return false;
5360}
5361
5362SDValue PPCTargetLowering::LowerCallResult(
5363 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5364 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5365 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5367 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5368 *DAG.getContext());
5369
5370 CCRetInfo.AnalyzeCallResult(
5371 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5373 : RetCC_PPC);
5374
5375 // Copy all of the result registers out of their specified physreg.
5376 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5377 CCValAssign &VA = RVLocs[i];
5378 assert(VA.isRegLoc() && "Can only return in registers!");
5379
5380 SDValue Val;
5381
5382 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5383 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5384 InGlue);
5385 Chain = Lo.getValue(1);
5386 InGlue = Lo.getValue(2);
5387 VA = RVLocs[++i]; // skip ahead to next loc
5388 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5389 InGlue);
5390 Chain = Hi.getValue(1);
5391 InGlue = Hi.getValue(2);
5392 if (!Subtarget.isLittleEndian())
5393 std::swap (Lo, Hi);
5394 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5395 } else {
5396 Val = DAG.getCopyFromReg(Chain, dl,
5397 VA.getLocReg(), VA.getLocVT(), InGlue);
5398 Chain = Val.getValue(1);
5399 InGlue = Val.getValue(2);
5400 }
5401
5402 switch (VA.getLocInfo()) {
5403 default: llvm_unreachable("Unknown loc info!");
5404 case CCValAssign::Full: break;
5405 case CCValAssign::AExt:
5406 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5407 break;
5408 case CCValAssign::ZExt:
5409 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5410 DAG.getValueType(VA.getValVT()));
5411 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5412 break;
5413 case CCValAssign::SExt:
5414 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5415 DAG.getValueType(VA.getValVT()));
5416 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5417 break;
5418 }
5419
5420 InVals.push_back(Val);
5421 }
5422
5423 return Chain;
5424}
5425
5426static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5427 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5428 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5429 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5430
5431 // PatchPoint calls are not indirect.
5432 if (isPatchPoint)
5433 return false;
5434
5435 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5436 return false;
5437
5438 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5439 // becuase the immediate function pointer points to a descriptor instead of
5440 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5441 // pointer immediate points to the global entry point, while the BLA would
5442 // need to jump to the local entry point (see rL211174).
5443 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5444 isBLACompatibleAddress(Callee, DAG))
5445 return false;
5446
5447 return true;
5448}
5449
5450// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5451static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5452 return Subtarget.isAIXABI() ||
5453 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5454}
5455
5457 const Function &Caller, const SDValue &Callee,
5458 const PPCSubtarget &Subtarget,
5459 const TargetMachine &TM,
5460 bool IsStrictFPCall = false) {
5461 if (CFlags.IsTailCall)
5462 return PPCISD::TC_RETURN;
5463
5464 unsigned RetOpc = 0;
5465 // This is a call through a function pointer.
5466 if (CFlags.IsIndirect) {
5467 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5468 // indirect calls. The save of the caller's TOC pointer to the stack will be
5469 // inserted into the DAG as part of call lowering. The restore of the TOC
5470 // pointer is modeled by using a pseudo instruction for the call opcode that
5471 // represents the 2 instruction sequence of an indirect branch and link,
5472 // immediately followed by a load of the TOC pointer from the stack save
5473 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5474 // as it is not saved or used.
5476 : PPCISD::BCTRL;
5477 } else if (Subtarget.isUsingPCRelativeCalls()) {
5478 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5479 RetOpc = PPCISD::CALL_NOTOC;
5480 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5481 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5482 // immediately following the call instruction if the caller and callee may
5483 // have different TOC bases. At link time if the linker determines the calls
5484 // may not share a TOC base, the call is redirected to a trampoline inserted
5485 // by the linker. The trampoline will (among other things) save the callers
5486 // TOC pointer at an ABI designated offset in the linkage area and the
5487 // linker will rewrite the nop to be a load of the TOC pointer from the
5488 // linkage area into gpr2.
5489 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5490 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5491 RetOpc =
5492 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5493 } else
5494 RetOpc = PPCISD::CALL;
5495 if (IsStrictFPCall) {
5496 switch (RetOpc) {
5497 default:
5498 llvm_unreachable("Unknown call opcode");
5501 break;
5502 case PPCISD::BCTRL:
5503 RetOpc = PPCISD::BCTRL_RM;
5504 break;
5505 case PPCISD::CALL_NOTOC:
5506 RetOpc = PPCISD::CALL_NOTOC_RM;
5507 break;
5508 case PPCISD::CALL:
5509 RetOpc = PPCISD::CALL_RM;
5510 break;
5511 case PPCISD::CALL_NOP:
5512 RetOpc = PPCISD::CALL_NOP_RM;
5513 break;
5514 }
5515 }
5516 return RetOpc;
5517}
5518
5519static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5520 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5521 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5522 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5523 return SDValue(Dest, 0);
5524
5525 // Returns true if the callee is local, and false otherwise.
5526 auto isLocalCallee = [&]() {
5527 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5528 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5529
5530 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5531 !isa_and_nonnull<GlobalIFunc>(GV);
5532 };
5533
5534 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5535 // a static relocation model causes some versions of GNU LD (2.17.50, at
5536 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5537 // built with secure-PLT.
5538 bool UsePlt =
5539 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5541
5542 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5543 const TargetMachine &TM = Subtarget.getTargetMachine();
5544 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5545 MCSymbolXCOFF *S =
5546 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5547
5549 return DAG.getMCSymbol(S, PtrVT);
5550 };
5551
5552 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5553 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5554 if (isFunctionGlobalAddress(GV)) {
5555 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5556
5557 if (Subtarget.isAIXABI()) {
5558 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5559 return getAIXFuncEntryPointSymbolSDNode(GV);
5560 }
5561 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5562 UsePlt ? PPCII::MO_PLT : 0);
5563 }
5564
5565 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5566 const char *SymName = S->getSymbol();
5567 if (Subtarget.isAIXABI()) {
5568 // If there exists a user-declared function whose name is the same as the
5569 // ExternalSymbol's, then we pick up the user-declared version.
5571 if (const Function *F =
5572 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5573 return getAIXFuncEntryPointSymbolSDNode(F);
5574
5575 // On AIX, direct function calls reference the symbol for the function's
5576 // entry point, which is named by prepending a "." before the function's
5577 // C-linkage name. A Qualname is returned here because an external
5578 // function entry point is a csect with XTY_ER property.
5579 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5580 auto &Context = DAG.getMachineFunction().getContext();
5581 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5582 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5584 return Sec->getQualNameSymbol();
5585 };
5586
5587 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5588 }
5589 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5590 UsePlt ? PPCII::MO_PLT : 0);
5591 }
5592
5593 // No transformation needed.
5594 assert(Callee.getNode() && "What no callee?");
5595 return Callee;
5596}
5597
5599 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5600 "Expected a CALLSEQ_STARTSDNode.");
5601
5602 // The last operand is the chain, except when the node has glue. If the node
5603 // has glue, then the last operand is the glue, and the chain is the second
5604 // last operand.
5605 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5606 if (LastValue.getValueType() != MVT::Glue)
5607 return LastValue;
5608
5609 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5610}
5611
5612// Creates the node that moves a functions address into the count register
5613// to prepare for an indirect call instruction.
5614static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5615 SDValue &Glue, SDValue &Chain,
5616 const SDLoc &dl) {
5617 SDValue MTCTROps[] = {Chain, Callee, Glue};
5618 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5619 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5620 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5621 // The glue is the second value produced.
5622 Glue = Chain.getValue(1);
5623}
5624
5626 SDValue &Glue, SDValue &Chain,
5627 SDValue CallSeqStart,
5628 const CallBase *CB, const SDLoc &dl,
5629 bool hasNest,
5630 const PPCSubtarget &Subtarget) {
5631 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5632 // entry point, but to the function descriptor (the function entry point
5633 // address is part of the function descriptor though).
5634 // The function descriptor is a three doubleword structure with the
5635 // following fields: function entry point, TOC base address and
5636 // environment pointer.
5637 // Thus for a call through a function pointer, the following actions need
5638 // to be performed:
5639 // 1. Save the TOC of the caller in the TOC save area of its stack
5640 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5641 // 2. Load the address of the function entry point from the function
5642 // descriptor.
5643 // 3. Load the TOC of the callee from the function descriptor into r2.
5644 // 4. Load the environment pointer from the function descriptor into
5645 // r11.
5646 // 5. Branch to the function entry point address.
5647 // 6. On return of the callee, the TOC of the caller needs to be
5648 // restored (this is done in FinishCall()).
5649 //
5650 // The loads are scheduled at the beginning of the call sequence, and the
5651 // register copies are flagged together to ensure that no other
5652 // operations can be scheduled in between. E.g. without flagging the
5653 // copies together, a TOC access in the caller could be scheduled between
5654 // the assignment of the callee TOC and the branch to the callee, which leads
5655 // to incorrect code.
5656
5657 // Start by loading the function address from the descriptor.
5658 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5659 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5663
5664 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5665
5666 // Registers used in building the DAG.
5667 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5668 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5669
5670 // Offsets of descriptor members.
5671 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5672 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5673
5674 const MVT RegVT = Subtarget.getScalarIntVT();
5675 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5676
5677 // One load for the functions entry point address.
5678 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5679 Alignment, MMOFlags);
5680
5681 // One for loading the TOC anchor for the module that contains the called
5682 // function.
5683 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5684 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5685 SDValue TOCPtr =
5686 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5687 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5688
5689 // One for loading the environment pointer.
5690 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5691 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5692 SDValue LoadEnvPtr =
5693 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5694 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5695
5696
5697 // Then copy the newly loaded TOC anchor to the TOC pointer.
5698 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5699 Chain = TOCVal.getValue(0);
5700 Glue = TOCVal.getValue(1);
5701
5702 // If the function call has an explicit 'nest' parameter, it takes the
5703 // place of the environment pointer.
5704 assert((!hasNest || !Subtarget.isAIXABI()) &&
5705 "Nest parameter is not supported on AIX.");
5706 if (!hasNest) {
5707 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5708 Chain = EnvVal.getValue(0);
5709 Glue = EnvVal.getValue(1);
5710 }
5711
5712 // The rest of the indirect call sequence is the same as the non-descriptor
5713 // DAG.
5714 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5715}
5716
5717static void
5719 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5720 SelectionDAG &DAG,
5721 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5722 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5723 const PPCSubtarget &Subtarget) {
5724 const bool IsPPC64 = Subtarget.isPPC64();
5725 // MVT for a general purpose register.
5726 const MVT RegVT = Subtarget.getScalarIntVT();
5727
5728 // First operand is always the chain.
5729 Ops.push_back(Chain);
5730
5731 // If it's a direct call pass the callee as the second operand.
5732 if (!CFlags.IsIndirect)
5733 Ops.push_back(Callee);
5734 else {
5735 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5736
5737 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5738 // on the stack (this would have been done in `LowerCall_64SVR4` or
5739 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5740 // represents both the indirect branch and a load that restores the TOC
5741 // pointer from the linkage area. The operand for the TOC restore is an add
5742 // of the TOC save offset to the stack pointer. This must be the second
5743 // operand: after the chain input but before any other variadic arguments.
5744 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5745 // saved or used.
5746 if (isTOCSaveRestoreRequired(Subtarget)) {
5747 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5748
5749 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5750 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5751 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5752 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5753 Ops.push_back(AddTOC);
5754 }
5755
5756 // Add the register used for the environment pointer.
5757 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5759 RegVT));
5760
5761
5762 // Add CTR register as callee so a bctr can be emitted later.
5763 if (CFlags.IsTailCall)
5764 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5765 }
5766
5767 // If this is a tail call add stack pointer delta.
5768 if (CFlags.IsTailCall)
5769 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5770
5771 // Add argument registers to the end of the list so that they are known live
5772 // into the call.
5773 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5774 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5775 RegsToPass[i].second.getValueType()));
5776
5777 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5778 // no way to mark dependencies as implicit here.
5779 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5780 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5781 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5782 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5783
5784 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5785 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5786 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5787
5788 // Add a register mask operand representing the call-preserved registers.
5789 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5790 const uint32_t *Mask =
5791 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5792 assert(Mask && "Missing call preserved mask for calling convention");
5793 Ops.push_back(DAG.getRegisterMask(Mask));
5794
5795 // If the glue is valid, it is the last operand.
5796 if (Glue.getNode())
5797 Ops.push_back(Glue);
5798}
5799
5800SDValue PPCTargetLowering::FinishCall(
5801 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5802 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5803 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5804 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5805 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5806
5807 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5808 Subtarget.isAIXABI())
5809 setUsesTOCBasePtr(DAG);
5810
5811 unsigned CallOpc =
5812 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5813 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5814
5815 if (!CFlags.IsIndirect)
5816 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5817 else if (Subtarget.usesFunctionDescriptors())
5818 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5819 dl, CFlags.HasNest, Subtarget);
5820 else
5821 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5822
5823 // Build the operand list for the call instruction.
5825 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5826 SPDiff, Subtarget);
5827
5828 // Emit tail call.
5829 if (CFlags.IsTailCall) {
5830 // Indirect tail call when using PC Relative calls do not have the same
5831 // constraints.
5832 assert(((Callee.getOpcode() == ISD::Register &&
5833 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5834 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5835 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5836 isa<ConstantSDNode>(Callee) ||
5837 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5838 "Expecting a global address, external symbol, absolute value, "
5839 "register or an indirect tail call when PC Relative calls are "
5840 "used.");
5841 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5842 assert(CallOpc == PPCISD::TC_RETURN &&
5843 "Unexpected call opcode for a tail call.");
5845 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5846 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5847 return Ret;
5848 }
5849
5850 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5851 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5852 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5853 Glue = Chain.getValue(1);
5854
5855 // When performing tail call optimization the callee pops its arguments off
5856 // the stack. Account for this here so these bytes can be pushed back on in
5857 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5858 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5860 ? NumBytes
5861 : 0;
5862
5863 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5864 Glue = Chain.getValue(1);
5865
5866 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5867 DAG, InVals);
5868}
5869
5871 CallingConv::ID CalleeCC = CB->getCallingConv();
5872 const Function *CallerFunc = CB->getCaller();
5873 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5874 const Function *CalleeFunc = CB->getCalledFunction();
5875 if (!CalleeFunc)
5876 return false;
5877 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5878
5881
5882 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5883 CalleeFunc->getAttributes(), Outs, *this,
5884 CalleeFunc->getDataLayout());
5885
5886 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5887 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5888 false /*isCalleeExternalSymbol*/);
5889}
5890
5891bool PPCTargetLowering::isEligibleForTCO(
5892 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5893 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5895 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5896 bool isCalleeExternalSymbol) const {
5897 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5898 return false;
5899
5900 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5901 return IsEligibleForTailCallOptimization_64SVR4(
5902 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5903 isCalleeExternalSymbol);
5904 else
5905 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5906 isVarArg, Ins);
5907}
5908
5909SDValue
5910PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5911 SmallVectorImpl<SDValue> &InVals) const {
5912 SelectionDAG &DAG = CLI.DAG;
5913 SDLoc &dl = CLI.DL;
5915 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5917 SDValue Chain = CLI.Chain;
5918 SDValue Callee = CLI.Callee;
5919 bool &isTailCall = CLI.IsTailCall;
5920 CallingConv::ID CallConv = CLI.CallConv;
5921 bool isVarArg = CLI.IsVarArg;
5922 bool isPatchPoint = CLI.IsPatchPoint;
5923 const CallBase *CB = CLI.CB;
5924
5925 if (isTailCall) {
5927 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5928 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5929 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5930 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5931
5932 isTailCall =
5933 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5934 &(MF.getFunction()), IsCalleeExternalSymbol);
5935 if (isTailCall) {
5936 ++NumTailCalls;
5937 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5938 ++NumSiblingCalls;
5939
5940 // PC Relative calls no longer guarantee that the callee is a Global
5941 // Address Node. The callee could be an indirect tail call in which
5942 // case the SDValue for the callee could be a load (to load the address
5943 // of a function pointer) or it may be a register copy (to move the
5944 // address of the callee from a function parameter into a virtual
5945 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5946 assert((Subtarget.isUsingPCRelativeCalls() ||
5947 isa<GlobalAddressSDNode>(Callee)) &&
5948 "Callee should be an llvm::Function object.");
5949
5950 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5951 << "\nTCO callee: ");
5952 LLVM_DEBUG(Callee.dump());
5953 }
5954 }
5955
5956 if (!isTailCall && CB && CB->isMustTailCall())
5957 report_fatal_error("failed to perform tail call elimination on a call "
5958 "site marked musttail");
5959
5960 // When long calls (i.e. indirect calls) are always used, calls are always
5961 // made via function pointer. If we have a function name, first translate it
5962 // into a pointer.
5963 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5964 !isTailCall)
5965 Callee = LowerGlobalAddress(Callee, DAG);
5966
5967 CallFlags CFlags(
5968 CallConv, isTailCall, isVarArg, isPatchPoint,
5969 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5970 // hasNest
5971 Subtarget.is64BitELFABI() &&
5972 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5973 CLI.NoMerge);
5974
5975 if (Subtarget.isAIXABI())
5976 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977 InVals, CB);
5978
5979 assert(Subtarget.isSVR4ABI());
5980 if (Subtarget.isPPC64())
5981 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5982 InVals, CB);
5983 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5984 InVals, CB);
5985}
5986
5987SDValue PPCTargetLowering::LowerCall_32SVR4(
5988 SDValue Chain, SDValue Callee, CallFlags CFlags,
5990 const SmallVectorImpl<SDValue> &OutVals,
5991 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5993 const CallBase *CB) const {
5994 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5995 // of the 32-bit SVR4 ABI stack frame layout.
5996
5997 const CallingConv::ID CallConv = CFlags.CallConv;
5998 const bool IsVarArg = CFlags.IsVarArg;
5999 const bool IsTailCall = CFlags.IsTailCall;
6000
6001 assert((CallConv == CallingConv::C ||
6002 CallConv == CallingConv::Cold ||
6003 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6004
6005 const Align PtrAlign(4);
6006
6008
6009 // Mark this function as potentially containing a function that contains a
6010 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6011 // and restoring the callers stack pointer in this functions epilog. This is
6012 // done because by tail calling the called function might overwrite the value
6013 // in this function's (MF) stack pointer stack slot 0(SP).
6014 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6015 CallConv == CallingConv::Fast)
6016 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6017
6018 // Count how many bytes are to be pushed on the stack, including the linkage
6019 // area, parameter list area and the part of the local variable space which
6020 // contains copies of aggregates which are passed by value.
6021
6022 // Assign locations to all of the outgoing arguments.
6024 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6025
6026 // Reserve space for the linkage area on the stack.
6027 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6028 PtrAlign);
6029 if (useSoftFloat())
6030 CCInfo.PreAnalyzeCallOperands(Outs);
6031
6032 if (IsVarArg) {
6033 // Handle fixed and variable vector arguments differently.
6034 // Fixed vector arguments go into registers as long as registers are
6035 // available. Variable vector arguments always go into memory.
6036 unsigned NumArgs = Outs.size();
6037
6038 for (unsigned i = 0; i != NumArgs; ++i) {
6039 MVT ArgVT = Outs[i].VT;
6040 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6041 bool Result;
6042
6043 if (Outs[i].IsFixed) {
6044 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6045 CCInfo);
6046 } else {
6048 ArgFlags, CCInfo);
6049 }
6050
6051 if (Result) {
6052#ifndef NDEBUG
6053 errs() << "Call operand #" << i << " has unhandled type "
6054 << ArgVT << "\n";
6055#endif
6056 llvm_unreachable(nullptr);
6057 }
6058 }
6059 } else {
6060 // All arguments are treated the same.
6061 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6062 }
6063 CCInfo.clearWasPPCF128();
6064
6065 // Assign locations to all of the outgoing aggregate by value arguments.
6066 SmallVector<CCValAssign, 16> ByValArgLocs;
6067 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6068
6069 // Reserve stack space for the allocations in CCInfo.
6070 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6071
6072 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6073
6074 // Size of the linkage area, parameter list area and the part of the local
6075 // space variable where copies of aggregates which are passed by value are
6076 // stored.
6077 unsigned NumBytes = CCByValInfo.getStackSize();
6078
6079 // Calculate by how many bytes the stack has to be adjusted in case of tail
6080 // call optimization.
6081 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6082
6083 // Adjust the stack pointer for the new arguments...
6084 // These operations are automatically eliminated by the prolog/epilog pass
6085 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6086 SDValue CallSeqStart = Chain;
6087
6088 // Load the return address and frame pointer so it can be moved somewhere else
6089 // later.
6090 SDValue LROp, FPOp;
6091 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6092
6093 // Set up a copy of the stack pointer for use loading and storing any
6094 // arguments that may not fit in the registers available for argument
6095 // passing.
6096 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6097
6099 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6100 SmallVector<SDValue, 8> MemOpChains;
6101
6102 bool seenFloatArg = false;
6103 // Walk the register/memloc assignments, inserting copies/loads.
6104 // i - Tracks the index into the list of registers allocated for the call
6105 // RealArgIdx - Tracks the index into the list of actual function arguments
6106 // j - Tracks the index into the list of byval arguments
6107 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6108 i != e;
6109 ++i, ++RealArgIdx) {
6110 CCValAssign &VA = ArgLocs[i];
6111 SDValue Arg = OutVals[RealArgIdx];
6112 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6113
6114 if (Flags.isByVal()) {
6115 // Argument is an aggregate which is passed by value, thus we need to
6116 // create a copy of it in the local variable space of the current stack
6117 // frame (which is the stack frame of the caller) and pass the address of
6118 // this copy to the callee.
6119 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6120 CCValAssign &ByValVA = ByValArgLocs[j++];
6121 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6122
6123 // Memory reserved in the local variable space of the callers stack frame.
6124 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6125
6126 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6127 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6128 StackPtr, PtrOff);
6129
6130 // Create a copy of the argument in the local area of the current
6131 // stack frame.
6132 SDValue MemcpyCall =
6133 CreateCopyOfByValArgument(Arg, PtrOff,
6134 CallSeqStart.getNode()->getOperand(0),
6135 Flags, DAG, dl);
6136
6137 // This must go outside the CALLSEQ_START..END.
6138 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6139 SDLoc(MemcpyCall));
6140 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6141 NewCallSeqStart.getNode());
6142 Chain = CallSeqStart = NewCallSeqStart;
6143
6144 // Pass the address of the aggregate copy on the stack either in a
6145 // physical register or in the parameter list area of the current stack
6146 // frame to the callee.
6147 Arg = PtrOff;
6148 }
6149
6150 // When useCRBits() is true, there can be i1 arguments.
6151 // It is because getRegisterType(MVT::i1) => MVT::i1,
6152 // and for other integer types getRegisterType() => MVT::i32.
6153 // Extend i1 and ensure callee will get i32.
6154 if (Arg.getValueType() == MVT::i1)
6155 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6156 dl, MVT::i32, Arg);
6157
6158 if (VA.isRegLoc()) {
6159 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6160 // Put argument in a physical register.
6161 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6162 bool IsLE = Subtarget.isLittleEndian();
6163 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6164 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6165 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6166 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6167 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6168 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6169 SVal.getValue(0)));
6170 } else
6171 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6172 } else {
6173 // Put argument in the parameter list area of the current stack frame.
6174 assert(VA.isMemLoc());
6175 unsigned LocMemOffset = VA.getLocMemOffset();
6176
6177 if (!IsTailCall) {
6178 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6179 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6180 StackPtr, PtrOff);
6181
6182 MemOpChains.push_back(
6183 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6184 } else {
6185 // Calculate and remember argument location.
6186 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6187 TailCallArguments);
6188 }
6189 }
6190 }
6191
6192 if (!MemOpChains.empty())
6193 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6194
6195 // Build a sequence of copy-to-reg nodes chained together with token chain
6196 // and flag operands which copy the outgoing args into the appropriate regs.
6197 SDValue InGlue;
6198 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6199 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6200 RegsToPass[i].second, InGlue);
6201 InGlue = Chain.getValue(1);
6202 }
6203
6204 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6205 // registers.
6206 if (IsVarArg) {
6207 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6208 SDValue Ops[] = { Chain, InGlue };
6209
6210 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6211 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6212
6213 InGlue = Chain.getValue(1);
6214 }
6215
6216 if (IsTailCall)
6217 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6218 TailCallArguments);
6219
6220 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6221 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6222}
6223
6224// Copy an argument into memory, being careful to do this outside the
6225// call sequence for the call to which the argument belongs.
6226SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6227 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6228 SelectionDAG &DAG, const SDLoc &dl) const {
6229 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6230 CallSeqStart.getNode()->getOperand(0),
6231 Flags, DAG, dl);
6232 // The MEMCPY must go outside the CALLSEQ_START..END.
6233 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6234 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6235 SDLoc(MemcpyCall));
6236 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6237 NewCallSeqStart.getNode());
6238 return NewCallSeqStart;
6239}
6240
6241SDValue PPCTargetLowering::LowerCall_64SVR4(
6242 SDValue Chain, SDValue Callee, CallFlags CFlags,
6244 const SmallVectorImpl<SDValue> &OutVals,
6245 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6247 const CallBase *CB) const {
6248 bool isELFv2ABI = Subtarget.isELFv2ABI();
6249 bool isLittleEndian = Subtarget.isLittleEndian();
6250 unsigned NumOps = Outs.size();
6251 bool IsSibCall = false;
6252 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6253
6254 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6255 unsigned PtrByteSize = 8;
6256
6258
6259 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6260 IsSibCall = true;
6261
6262 // Mark this function as potentially containing a function that contains a
6263 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6264 // and restoring the callers stack pointer in this functions epilog. This is
6265 // done because by tail calling the called function might overwrite the value
6266 // in this function's (MF) stack pointer stack slot 0(SP).
6267 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6268 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6269
6270 assert(!(IsFastCall && CFlags.IsVarArg) &&
6271 "fastcc not supported on varargs functions");
6272
6273 // Count how many bytes are to be pushed on the stack, including the linkage
6274 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6275 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6276 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6277 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6278 unsigned NumBytes = LinkageSize;
6279 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6280
6281 static const MCPhysReg GPR[] = {
6282 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6283 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6284 };
6285 static const MCPhysReg VR[] = {
6286 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6287 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6288 };
6289
6290 const unsigned NumGPRs = std::size(GPR);
6291 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6292 const unsigned NumVRs = std::size(VR);
6293
6294 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6295 // can be passed to the callee in registers.
6296 // For the fast calling convention, there is another check below.
6297 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6298 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6299 if (!HasParameterArea) {
6300 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6301 unsigned AvailableFPRs = NumFPRs;
6302 unsigned AvailableVRs = NumVRs;
6303 unsigned NumBytesTmp = NumBytes;
6304 for (unsigned i = 0; i != NumOps; ++i) {
6305 if (Outs[i].Flags.isNest()) continue;
6306 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6307 PtrByteSize, LinkageSize, ParamAreaSize,
6308 NumBytesTmp, AvailableFPRs, AvailableVRs))
6309 HasParameterArea = true;
6310 }
6311 }
6312
6313 // When using the fast calling convention, we don't provide backing for
6314 // arguments that will be in registers.
6315 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6316
6317 // Avoid allocating parameter area for fastcc functions if all the arguments
6318 // can be passed in the registers.
6319 if (IsFastCall)
6320 HasParameterArea = false;
6321
6322 // Add up all the space actually used.
6323 for (unsigned i = 0; i != NumOps; ++i) {
6324 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6325 EVT ArgVT = Outs[i].VT;
6326 EVT OrigVT = Outs[i].ArgVT;
6327
6328 if (Flags.isNest())
6329 continue;
6330
6331 if (IsFastCall) {
6332 if (Flags.isByVal()) {
6333 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6334 if (NumGPRsUsed > NumGPRs)
6335 HasParameterArea = true;
6336 } else {
6337 switch (ArgVT.getSimpleVT().SimpleTy) {
6338 default: llvm_unreachable("Unexpected ValueType for argument!");
6339 case MVT::i1:
6340 case MVT::i32:
6341 case MVT::i64:
6342 if (++NumGPRsUsed <= NumGPRs)
6343 continue;
6344 break;
6345 case MVT::v4i32:
6346 case MVT::v8i16:
6347 case MVT::v16i8:
6348 case MVT::v2f64:
6349 case MVT::v2i64:
6350 case MVT::v1i128:
6351 case MVT::f128:
6352 if (++NumVRsUsed <= NumVRs)
6353 continue;
6354 break;
6355 case MVT::v4f32:
6356 if (++NumVRsUsed <= NumVRs)
6357 continue;
6358 break;
6359 case MVT::f32:
6360 case MVT::f64:
6361 if (++NumFPRsUsed <= NumFPRs)
6362 continue;
6363 break;
6364 }
6365 HasParameterArea = true;
6366 }
6367 }
6368
6369 /* Respect alignment of argument on the stack. */
6370 auto Alignement =
6371 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6372 NumBytes = alignTo(NumBytes, Alignement);
6373
6374 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6375 if (Flags.isInConsecutiveRegsLast())
6376 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6377 }
6378
6379 unsigned NumBytesActuallyUsed = NumBytes;
6380
6381 // In the old ELFv1 ABI,
6382 // the prolog code of the callee may store up to 8 GPR argument registers to
6383 // the stack, allowing va_start to index over them in memory if its varargs.
6384 // Because we cannot tell if this is needed on the caller side, we have to
6385 // conservatively assume that it is needed. As such, make sure we have at
6386 // least enough stack space for the caller to store the 8 GPRs.
6387 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6388 // really requires memory operands, e.g. a vararg function.
6389 if (HasParameterArea)
6390 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6391 else
6392 NumBytes = LinkageSize;
6393
6394 // Tail call needs the stack to be aligned.
6395 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6396 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6397
6398 int SPDiff = 0;
6399
6400 // Calculate by how many bytes the stack has to be adjusted in case of tail
6401 // call optimization.
6402 if (!IsSibCall)
6403 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6404
6405 // To protect arguments on the stack from being clobbered in a tail call,
6406 // force all the loads to happen before doing any other lowering.
6407 if (CFlags.IsTailCall)
6408 Chain = DAG.getStackArgumentTokenFactor(Chain);
6409
6410 // Adjust the stack pointer for the new arguments...
6411 // These operations are automatically eliminated by the prolog/epilog pass
6412 if (!IsSibCall)
6413 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6414 SDValue CallSeqStart = Chain;
6415
6416 // Load the return address and frame pointer so it can be move somewhere else
6417 // later.
6418 SDValue LROp, FPOp;
6419 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6420
6421 // Set up a copy of the stack pointer for use loading and storing any
6422 // arguments that may not fit in the registers available for argument
6423 // passing.
6424 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6425
6426 // Figure out which arguments are going to go in registers, and which in
6427 // memory. Also, if this is a vararg function, floating point operations
6428 // must be stored to our stack, and loaded into integer regs as well, if
6429 // any integer regs are available for argument passing.
6430 unsigned ArgOffset = LinkageSize;
6431
6433 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6434
6435 SmallVector<SDValue, 8> MemOpChains;
6436 for (unsigned i = 0; i != NumOps; ++i) {
6437 SDValue Arg = OutVals[i];
6438 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6439 EVT ArgVT = Outs[i].VT;
6440 EVT OrigVT = Outs[i].ArgVT;
6441
6442 // PtrOff will be used to store the current argument to the stack if a
6443 // register cannot be found for it.
6444 SDValue PtrOff;
6445
6446 // We re-align the argument offset for each argument, except when using the
6447 // fast calling convention, when we need to make sure we do that only when
6448 // we'll actually use a stack slot.
6449 auto ComputePtrOff = [&]() {
6450 /* Respect alignment of argument on the stack. */
6451 auto Alignment =
6452 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6453 ArgOffset = alignTo(ArgOffset, Alignment);
6454
6455 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6456
6457 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6458 };
6459
6460 if (!IsFastCall) {
6461 ComputePtrOff();
6462
6463 /* Compute GPR index associated with argument offset. */
6464 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6465 GPR_idx = std::min(GPR_idx, NumGPRs);
6466 }
6467
6468 // Promote integers to 64-bit values.
6469 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6470 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6471 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6472 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6473 }
6474
6475 // FIXME memcpy is used way more than necessary. Correctness first.
6476 // Note: "by value" is code for passing a structure by value, not
6477 // basic types.
6478 if (Flags.isByVal()) {
6479 // Note: Size includes alignment padding, so
6480 // struct x { short a; char b; }
6481 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6482 // These are the proper values we need for right-justifying the
6483 // aggregate in a parameter register.
6484 unsigned Size = Flags.getByValSize();
6485
6486 // An empty aggregate parameter takes up no storage and no
6487 // registers.
6488 if (Size == 0)
6489 continue;
6490
6491 if (IsFastCall)
6492 ComputePtrOff();
6493
6494 // All aggregates smaller than 8 bytes must be passed right-justified.
6495 if (Size==1 || Size==2 || Size==4) {
6496 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6497 if (GPR_idx != NumGPRs) {
6498 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6499 MachinePointerInfo(), VT);
6500 MemOpChains.push_back(Load.getValue(1));
6501 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6502
6503 ArgOffset += PtrByteSize;
6504 continue;
6505 }
6506 }
6507
6508 if (GPR_idx == NumGPRs && Size < 8) {
6509 SDValue AddPtr = PtrOff;
6510 if (!isLittleEndian) {
6511 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6512 PtrOff.getValueType());
6513 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6514 }
6515 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6516 CallSeqStart,
6517 Flags, DAG, dl);
6518 ArgOffset += PtrByteSize;
6519 continue;
6520 }
6521 // Copy the object to parameter save area if it can not be entirely passed
6522 // by registers.
6523 // FIXME: we only need to copy the parts which need to be passed in
6524 // parameter save area. For the parts passed by registers, we don't need
6525 // to copy them to the stack although we need to allocate space for them
6526 // in parameter save area.
6527 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6528 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6529 CallSeqStart,
6530 Flags, DAG, dl);
6531
6532 // When a register is available, pass a small aggregate right-justified.
6533 if (Size < 8 && GPR_idx != NumGPRs) {
6534 // The easiest way to get this right-justified in a register
6535 // is to copy the structure into the rightmost portion of a
6536 // local variable slot, then load the whole slot into the
6537 // register.
6538 // FIXME: The memcpy seems to produce pretty awful code for
6539 // small aggregates, particularly for packed ones.
6540 // FIXME: It would be preferable to use the slot in the
6541 // parameter save area instead of a new local variable.
6542 SDValue AddPtr = PtrOff;
6543 if (!isLittleEndian) {
6544 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6545 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6546 }
6547 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6548 CallSeqStart,
6549 Flags, DAG, dl);
6550
6551 // Load the slot into the register.
6552 SDValue Load =
6553 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6554 MemOpChains.push_back(Load.getValue(1));
6555 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6556
6557 // Done with this argument.
6558 ArgOffset += PtrByteSize;
6559 continue;
6560 }
6561
6562 // For aggregates larger than PtrByteSize, copy the pieces of the
6563 // object that fit into registers from the parameter save area.
6564 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6565 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6566 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6567 if (GPR_idx != NumGPRs) {
6568 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6569 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6570 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6571 MachinePointerInfo(), ObjType);
6572
6573 MemOpChains.push_back(Load.getValue(1));
6574 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6575 ArgOffset += PtrByteSize;
6576 } else {
6577 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6578 break;
6579 }
6580 }
6581 continue;
6582 }
6583
6584 switch (Arg.getSimpleValueType().SimpleTy) {
6585 default: llvm_unreachable("Unexpected ValueType for argument!");
6586 case MVT::i1:
6587 case MVT::i32:
6588 case MVT::i64:
6589 if (Flags.isNest()) {
6590 // The 'nest' parameter, if any, is passed in R11.
6591 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6592 break;
6593 }
6594
6595 // These can be scalar arguments or elements of an integer array type
6596 // passed directly. Clang may use those instead of "byval" aggregate
6597 // types to avoid forcing arguments to memory unnecessarily.
6598 if (GPR_idx != NumGPRs) {
6599 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6600 } else {
6601 if (IsFastCall)
6602 ComputePtrOff();
6603
6604 assert(HasParameterArea &&
6605 "Parameter area must exist to pass an argument in memory.");
6606 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6607 true, CFlags.IsTailCall, false, MemOpChains,
6608 TailCallArguments, dl);
6609 if (IsFastCall)
6610 ArgOffset += PtrByteSize;
6611 }
6612 if (!IsFastCall)
6613 ArgOffset += PtrByteSize;
6614 break;
6615 case MVT::f32:
6616 case MVT::f64: {
6617 // These can be scalar arguments or elements of a float array type
6618 // passed directly. The latter are used to implement ELFv2 homogenous
6619 // float aggregates.
6620
6621 // Named arguments go into FPRs first, and once they overflow, the
6622 // remaining arguments go into GPRs and then the parameter save area.
6623 // Unnamed arguments for vararg functions always go to GPRs and
6624 // then the parameter save area. For now, put all arguments to vararg
6625 // routines always in both locations (FPR *and* GPR or stack slot).
6626 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6627 bool NeededLoad = false;
6628
6629 // First load the argument into the next available FPR.
6630 if (FPR_idx != NumFPRs)
6631 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6632
6633 // Next, load the argument into GPR or stack slot if needed.
6634 if (!NeedGPROrStack)
6635 ;
6636 else if (GPR_idx != NumGPRs && !IsFastCall) {
6637 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6638 // once we support fp <-> gpr moves.
6639
6640 // In the non-vararg case, this can only ever happen in the
6641 // presence of f32 array types, since otherwise we never run
6642 // out of FPRs before running out of GPRs.
6643 SDValue ArgVal;
6644
6645 // Double values are always passed in a single GPR.
6646 if (Arg.getValueType() != MVT::f32) {
6647 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6648
6649 // Non-array float values are extended and passed in a GPR.
6650 } else if (!Flags.isInConsecutiveRegs()) {
6651 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6652 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6653
6654 // If we have an array of floats, we collect every odd element
6655 // together with its predecessor into one GPR.
6656 } else if (ArgOffset % PtrByteSize != 0) {
6657 SDValue Lo, Hi;
6658 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6659 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6660 if (!isLittleEndian)
6661 std::swap(Lo, Hi);
6662 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6663
6664 // The final element, if even, goes into the first half of a GPR.
6665 } else if (Flags.isInConsecutiveRegsLast()) {
6666 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6667 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6668 if (!isLittleEndian)
6669 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6670 DAG.getConstant(32, dl, MVT::i32));
6671
6672 // Non-final even elements are skipped; they will be handled
6673 // together the with subsequent argument on the next go-around.
6674 } else
6675 ArgVal = SDValue();
6676
6677 if (ArgVal.getNode())
6678 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6679 } else {
6680 if (IsFastCall)
6681 ComputePtrOff();
6682
6683 // Single-precision floating-point values are mapped to the
6684 // second (rightmost) word of the stack doubleword.
6685 if (Arg.getValueType() == MVT::f32 &&
6686 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6687 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6688 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6689 }
6690
6691 assert(HasParameterArea &&
6692 "Parameter area must exist to pass an argument in memory.");
6693 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6694 true, CFlags.IsTailCall, false, MemOpChains,
6695 TailCallArguments, dl);
6696
6697 NeededLoad = true;
6698 }
6699 // When passing an array of floats, the array occupies consecutive
6700 // space in the argument area; only round up to the next doubleword
6701 // at the end of the array. Otherwise, each float takes 8 bytes.
6702 if (!IsFastCall || NeededLoad) {
6703 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6704 Flags.isInConsecutiveRegs()) ? 4 : 8;
6705 if (Flags.isInConsecutiveRegsLast())
6706 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6707 }
6708 break;
6709 }
6710 case MVT::v4f32:
6711 case MVT::v4i32:
6712 case MVT::v8i16:
6713 case MVT::v16i8:
6714 case MVT::v2f64:
6715 case MVT::v2i64:
6716 case MVT::v1i128:
6717 case MVT::f128:
6718 // These can be scalar arguments or elements of a vector array type
6719 // passed directly. The latter are used to implement ELFv2 homogenous
6720 // vector aggregates.
6721
6722 // For a varargs call, named arguments go into VRs or on the stack as
6723 // usual; unnamed arguments always go to the stack or the corresponding
6724 // GPRs when within range. For now, we always put the value in both
6725 // locations (or even all three).
6726 if (CFlags.IsVarArg) {
6727 assert(HasParameterArea &&
6728 "Parameter area must exist if we have a varargs call.");
6729 // We could elide this store in the case where the object fits
6730 // entirely in R registers. Maybe later.
6731 SDValue Store =
6732 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6733 MemOpChains.push_back(Store);
6734 if (VR_idx != NumVRs) {
6735 SDValue Load =
6736 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6737 MemOpChains.push_back(Load.getValue(1));
6738 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6739 }
6740 ArgOffset += 16;
6741 for (unsigned i=0; i<16; i+=PtrByteSize) {
6742 if (GPR_idx == NumGPRs)
6743 break;
6744 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6745 DAG.getConstant(i, dl, PtrVT));
6746 SDValue Load =
6747 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6748 MemOpChains.push_back(Load.getValue(1));
6749 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6750 }
6751 break;
6752 }
6753
6754 // Non-varargs Altivec params go into VRs or on the stack.
6755 if (VR_idx != NumVRs) {
6756 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6757 } else {
6758 if (IsFastCall)
6759 ComputePtrOff();
6760
6761 assert(HasParameterArea &&
6762 "Parameter area must exist to pass an argument in memory.");
6763 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6764 true, CFlags.IsTailCall, true, MemOpChains,
6765 TailCallArguments, dl);
6766 if (IsFastCall)
6767 ArgOffset += 16;
6768 }
6769
6770 if (!IsFastCall)
6771 ArgOffset += 16;
6772 break;
6773 }
6774 }
6775
6776 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6777 "mismatch in size of parameter area");
6778 (void)NumBytesActuallyUsed;
6779
6780 if (!MemOpChains.empty())
6781 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6782
6783 // Check if this is an indirect call (MTCTR/BCTRL).
6784 // See prepareDescriptorIndirectCall and buildCallOperands for more
6785 // information about calls through function pointers in the 64-bit SVR4 ABI.
6786 if (CFlags.IsIndirect) {
6787 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6788 // caller in the TOC save area.
6789 if (isTOCSaveRestoreRequired(Subtarget)) {
6790 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6791 // Load r2 into a virtual register and store it to the TOC save area.
6792 setUsesTOCBasePtr(DAG);
6793 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6794 // TOC save area offset.
6795 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6796 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6797 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6798 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6800 DAG.getMachineFunction(), TOCSaveOffset));
6801 }
6802 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6803 // This does not mean the MTCTR instruction must use R12; it's easier
6804 // to model this as an extra parameter, so do that.
6805 if (isELFv2ABI && !CFlags.IsPatchPoint)
6806 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6807 }
6808
6809 // Build a sequence of copy-to-reg nodes chained together with token chain
6810 // and flag operands which copy the outgoing args into the appropriate regs.
6811 SDValue InGlue;
6812 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6813 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6814 RegsToPass[i].second, InGlue);
6815 InGlue = Chain.getValue(1);
6816 }
6817
6818 if (CFlags.IsTailCall && !IsSibCall)
6819 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6820 TailCallArguments);
6821
6822 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6823 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6824}
6825
6826// Returns true when the shadow of a general purpose argument register
6827// in the parameter save area is aligned to at least 'RequiredAlign'.
6828static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6829 assert(RequiredAlign.value() <= 16 &&
6830 "Required alignment greater than stack alignment.");
6831 switch (Reg) {
6832 default:
6833 report_fatal_error("called on invalid register.");
6834 case PPC::R5:
6835 case PPC::R9:
6836 case PPC::X3:
6837 case PPC::X5:
6838 case PPC::X7:
6839 case PPC::X9:
6840 // These registers are 16 byte aligned which is the most strict aligment
6841 // we can support.
6842 return true;
6843 case PPC::R3:
6844 case PPC::R7:
6845 case PPC::X4:
6846 case PPC::X6:
6847 case PPC::X8:
6848 case PPC::X10:
6849 // The shadow of these registers in the PSA is 8 byte aligned.
6850 return RequiredAlign <= 8;
6851 case PPC::R4:
6852 case PPC::R6:
6853 case PPC::R8:
6854 case PPC::R10:
6855 return RequiredAlign <= 4;
6856 }
6857}
6858
6859static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6860 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6861 CCState &S) {
6862 AIXCCState &State = static_cast<AIXCCState &>(S);
6863 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6865 const bool IsPPC64 = Subtarget.isPPC64();
6866 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6867 const Align PtrAlign(PtrSize);
6868 const Align StackAlign(16);
6869 const MVT RegVT = Subtarget.getScalarIntVT();
6870
6871 if (ValVT == MVT::f128)
6872 report_fatal_error("f128 is unimplemented on AIX.");
6873
6874 if (ArgFlags.isNest())
6875 report_fatal_error("Nest arguments are unimplemented.");
6876
6877 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6878 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6879 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6880 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6881 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6882 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6883
6884 static const MCPhysReg VR[] = {// Vector registers.
6885 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6886 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6887 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6888
6889 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6890
6891 if (ArgFlags.isByVal()) {
6892 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6893 if (ByValAlign > StackAlign)
6894 report_fatal_error("Pass-by-value arguments with alignment greater than "
6895 "16 are not supported.");
6896
6897 const unsigned ByValSize = ArgFlags.getByValSize();
6898 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6899
6900 // An empty aggregate parameter takes up no storage and no registers,
6901 // but needs a MemLoc for a stack slot for the formal arguments side.
6902 if (ByValSize == 0) {
6904 State.getStackSize(), RegVT, LocInfo));
6905 return false;
6906 }
6907
6908 // Shadow allocate any registers that are not properly aligned.
6909 unsigned NextReg = State.getFirstUnallocated(GPRs);
6910 while (NextReg != GPRs.size() &&
6911 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6912 // Shadow allocate next registers since its aligment is not strict enough.
6913 MCRegister Reg = State.AllocateReg(GPRs);
6914 // Allocate the stack space shadowed by said register.
6915 State.AllocateStack(PtrSize, PtrAlign);
6916 assert(Reg && "Alocating register unexpectedly failed.");
6917 (void)Reg;
6918 NextReg = State.getFirstUnallocated(GPRs);
6919 }
6920
6921 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6922 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6923 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6924 if (MCRegister Reg = State.AllocateReg(GPRs))
6925 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6926 else {
6929 LocInfo));
6930 break;
6931 }
6932 }
6933 return false;
6934 }
6935
6936 // Arguments always reserve parameter save area.
6937 switch (ValVT.SimpleTy) {
6938 default:
6939 report_fatal_error("Unhandled value type for argument.");
6940 case MVT::i64:
6941 // i64 arguments should have been split to i32 for PPC32.
6942 assert(IsPPC64 && "PPC32 should have split i64 values.");
6943 [[fallthrough]];
6944 case MVT::i1:
6945 case MVT::i32: {
6946 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6947 // AIX integer arguments are always passed in register width.
6948 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6949 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6951 if (MCRegister Reg = State.AllocateReg(GPRs))
6952 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6953 else
6954 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6955
6956 return false;
6957 }
6958 case MVT::f32:
6959 case MVT::f64: {
6960 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6961 const unsigned StoreSize = LocVT.getStoreSize();
6962 // Floats are always 4-byte aligned in the PSA on AIX.
6963 // This includes f64 in 64-bit mode for ABI compatibility.
6964 const unsigned Offset =
6965 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6966 MCRegister FReg = State.AllocateReg(FPR);
6967 if (FReg)
6968 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6969
6970 // Reserve and initialize GPRs or initialize the PSA as required.
6971 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6972 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6973 assert(FReg && "An FPR should be available when a GPR is reserved.");
6974 if (State.isVarArg()) {
6975 // Successfully reserved GPRs are only initialized for vararg calls.
6976 // Custom handling is required for:
6977 // f64 in PPC32 needs to be split into 2 GPRs.
6978 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6979 State.addLoc(
6980 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6981 }
6982 } else {
6983 // If there are insufficient GPRs, the PSA needs to be initialized.
6984 // Initialization occurs even if an FPR was initialized for
6985 // compatibility with the AIX XL compiler. The full memory for the
6986 // argument will be initialized even if a prior word is saved in GPR.
6987 // A custom memLoc is used when the argument also passes in FPR so
6988 // that the callee handling can skip over it easily.
6989 State.addLoc(
6990 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6991 LocInfo)
6992 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6993 break;
6994 }
6995 }
6996
6997 return false;
6998 }
6999 case MVT::v4f32:
7000 case MVT::v4i32:
7001 case MVT::v8i16:
7002 case MVT::v16i8:
7003 case MVT::v2i64:
7004 case MVT::v2f64:
7005 case MVT::v1i128: {
7006 const unsigned VecSize = 16;
7007 const Align VecAlign(VecSize);
7008
7009 if (!State.isVarArg()) {
7010 // If there are vector registers remaining we don't consume any stack
7011 // space.
7012 if (MCRegister VReg = State.AllocateReg(VR)) {
7013 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7014 return false;
7015 }
7016 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7017 // might be allocated in the portion of the PSA that is shadowed by the
7018 // GPRs.
7019 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7020 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7021 return false;
7022 }
7023
7024 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7025 // Burn any underaligned registers and their shadowed stack space until
7026 // we reach the required alignment.
7027 while (NextRegIndex != GPRs.size() &&
7028 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7029 // Shadow allocate register and its stack shadow.
7030 MCRegister Reg = State.AllocateReg(GPRs);
7031 State.AllocateStack(PtrSize, PtrAlign);
7032 assert(Reg && "Allocating register unexpectedly failed.");
7033 (void)Reg;
7034 NextRegIndex = State.getFirstUnallocated(GPRs);
7035 }
7036
7037 // Vectors that are passed as fixed arguments are handled differently.
7038 // They are passed in VRs if any are available (unlike arguments passed
7039 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7040 // functions)
7041 if (State.isFixed(ValNo)) {
7042 if (MCRegister VReg = State.AllocateReg(VR)) {
7043 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7044 // Shadow allocate GPRs and stack space even though we pass in a VR.
7045 for (unsigned I = 0; I != VecSize; I += PtrSize)
7046 State.AllocateReg(GPRs);
7047 State.AllocateStack(VecSize, VecAlign);
7048 return false;
7049 }
7050 // No vector registers remain so pass on the stack.
7051 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7052 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053 return false;
7054 }
7055
7056 // If all GPRS are consumed then we pass the argument fully on the stack.
7057 if (NextRegIndex == GPRs.size()) {
7058 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7059 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7060 return false;
7061 }
7062
7063 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7064 // half of the argument, and then need to pass the remaining half on the
7065 // stack.
7066 if (GPRs[NextRegIndex] == PPC::R9) {
7067 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7068 State.addLoc(
7069 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7070
7071 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7072 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7073 assert(FirstReg && SecondReg &&
7074 "Allocating R9 or R10 unexpectedly failed.");
7075 State.addLoc(
7076 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7077 State.addLoc(
7078 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7079 return false;
7080 }
7081
7082 // We have enough GPRs to fully pass the vector argument, and we have
7083 // already consumed any underaligned registers. Start with the custom
7084 // MemLoc and then the custom RegLocs.
7085 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7086 State.addLoc(
7087 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7088 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7089 const MCRegister Reg = State.AllocateReg(GPRs);
7090 assert(Reg && "Failed to allocated register for vararg vector argument");
7091 State.addLoc(
7092 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7093 }
7094 return false;
7095 }
7096 }
7097 return true;
7098}
7099
7100// So far, this function is only used by LowerFormalArguments_AIX()
7102 bool IsPPC64,
7103 bool HasP8Vector,
7104 bool HasVSX) {
7105 assert((IsPPC64 || SVT != MVT::i64) &&
7106 "i64 should have been split for 32-bit codegen.");
7107
7108 switch (SVT) {
7109 default:
7110 report_fatal_error("Unexpected value type for formal argument");
7111 case MVT::i1:
7112 case MVT::i32:
7113 case MVT::i64:
7114 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7115 case MVT::f32:
7116 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7117 case MVT::f64:
7118 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7119 case MVT::v4f32:
7120 case MVT::v4i32:
7121 case MVT::v8i16:
7122 case MVT::v16i8:
7123 case MVT::v2i64:
7124 case MVT::v2f64:
7125 case MVT::v1i128:
7126 return &PPC::VRRCRegClass;
7127 }
7128}
7129
7131 SelectionDAG &DAG, SDValue ArgValue,
7132 MVT LocVT, const SDLoc &dl) {
7133 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7134 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7135
7136 if (Flags.isSExt())
7137 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7138 DAG.getValueType(ValVT));
7139 else if (Flags.isZExt())
7140 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7141 DAG.getValueType(ValVT));
7142
7143 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7144}
7145
7146static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7147 const unsigned LASize = FL->getLinkageSize();
7148
7149 if (PPC::GPRCRegClass.contains(Reg)) {
7150 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7151 "Reg must be a valid argument register!");
7152 return LASize + 4 * (Reg - PPC::R3);
7153 }
7154
7155 if (PPC::G8RCRegClass.contains(Reg)) {
7156 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7157 "Reg must be a valid argument register!");
7158 return LASize + 8 * (Reg - PPC::X3);
7159 }
7160
7161 llvm_unreachable("Only general purpose registers expected.");
7162}
7163
7164// AIX ABI Stack Frame Layout:
7165//
7166// Low Memory +--------------------------------------------+
7167// SP +---> | Back chain | ---+
7168// | +--------------------------------------------+ |
7169// | | Saved Condition Register | |
7170// | +--------------------------------------------+ |
7171// | | Saved Linkage Register | |
7172// | +--------------------------------------------+ | Linkage Area
7173// | | Reserved for compilers | |
7174// | +--------------------------------------------+ |
7175// | | Reserved for binders | |
7176// | +--------------------------------------------+ |
7177// | | Saved TOC pointer | ---+
7178// | +--------------------------------------------+
7179// | | Parameter save area |
7180// | +--------------------------------------------+
7181// | | Alloca space |
7182// | +--------------------------------------------+
7183// | | Local variable space |
7184// | +--------------------------------------------+
7185// | | Float/int conversion temporary |
7186// | +--------------------------------------------+
7187// | | Save area for AltiVec registers |
7188// | +--------------------------------------------+
7189// | | AltiVec alignment padding |
7190// | +--------------------------------------------+
7191// | | Save area for VRSAVE register |
7192// | +--------------------------------------------+
7193// | | Save area for General Purpose registers |
7194// | +--------------------------------------------+
7195// | | Save area for Floating Point registers |
7196// | +--------------------------------------------+
7197// +---- | Back chain |
7198// High Memory +--------------------------------------------+
7199//
7200// Specifications:
7201// AIX 7.2 Assembler Language Reference
7202// Subroutine linkage convention
7203
7204SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7205 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7206 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7207 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7208
7209 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7210 CallConv == CallingConv::Fast) &&
7211 "Unexpected calling convention!");
7212
7213 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7214 report_fatal_error("Tail call support is unimplemented on AIX.");
7215
7216 if (useSoftFloat())
7217 report_fatal_error("Soft float support is unimplemented on AIX.");
7218
7219 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7220
7221 const bool IsPPC64 = Subtarget.isPPC64();
7222 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7223
7224 // Assign locations to all of the incoming arguments.
7227 MachineFrameInfo &MFI = MF.getFrameInfo();
7228 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7229 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7230
7231 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7232 // Reserve space for the linkage area on the stack.
7233 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7234 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7235 uint64_t SaveStackPos = CCInfo.getStackSize();
7236 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7237 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7238
7240
7241 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7242 CCValAssign &VA = ArgLocs[I++];
7243 MVT LocVT = VA.getLocVT();
7244 MVT ValVT = VA.getValVT();
7245 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7246
7247 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7248 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7249 // For compatibility with the AIX XL compiler, the float args in the
7250 // parameter save area are initialized even if the argument is available
7251 // in register. The caller is required to initialize both the register
7252 // and memory, however, the callee can choose to expect it in either.
7253 // The memloc is dismissed here because the argument is retrieved from
7254 // the register.
7255 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7256 continue;
7257
7258 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7259 const TargetRegisterClass *RegClass = getRegClassForSVT(
7260 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7261 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7262 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7263 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7264 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7265 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7266 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7267 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7268 MachinePointerInfo(), Align(PtrByteSize));
7269 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7270 MemOps.push_back(StoreReg);
7271 }
7272
7273 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7274 unsigned StoreSize =
7275 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7276 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7277 }
7278
7279 auto HandleMemLoc = [&]() {
7280 const unsigned LocSize = LocVT.getStoreSize();
7281 const unsigned ValSize = ValVT.getStoreSize();
7282 assert((ValSize <= LocSize) &&
7283 "Object size is larger than size of MemLoc");
7284 int CurArgOffset = VA.getLocMemOffset();
7285 // Objects are right-justified because AIX is big-endian.
7286 if (LocSize > ValSize)
7287 CurArgOffset += LocSize - ValSize;
7288 // Potential tail calls could cause overwriting of argument stack slots.
7289 const bool IsImmutable =
7291 (CallConv == CallingConv::Fast));
7292 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7293 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7294 SDValue ArgValue =
7295 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7296
7297 // While the ABI specifies the argument type is (sign or zero) extended
7298 // out to register width, not all code is compliant. We truncate and
7299 // re-extend to be more forgiving of these callers when the argument type
7300 // is smaller than register width.
7301 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7302 ValVT.isInteger() &&
7303 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7304 SDValue ArgValueTrunc = DAG.getNode(
7305 ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7306 ArgValue);
7307 SDValue ArgValueExt =
7308 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7309 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7310 InVals.push_back(ArgValueExt);
7311 } else {
7312 InVals.push_back(ArgValue);
7313 }
7314 };
7315
7316 // Vector arguments to VaArg functions are passed both on the stack, and
7317 // in any available GPRs. Load the value from the stack and add the GPRs
7318 // as live ins.
7319 if (VA.isMemLoc() && VA.needsCustom()) {
7320 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7321 assert(isVarArg && "Only use custom memloc for vararg.");
7322 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7323 // matching custom RegLocs.
7324 const unsigned OriginalValNo = VA.getValNo();
7325 (void)OriginalValNo;
7326
7327 auto HandleCustomVecRegLoc = [&]() {
7328 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7329 "Missing custom RegLoc.");
7330 VA = ArgLocs[I++];
7331 assert(VA.getValVT().isVector() &&
7332 "Unexpected Val type for custom RegLoc.");
7333 assert(VA.getValNo() == OriginalValNo &&
7334 "ValNo mismatch between custom MemLoc and RegLoc.");
7336 MF.addLiveIn(VA.getLocReg(),
7337 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7338 Subtarget.hasVSX()));
7339 };
7340
7341 HandleMemLoc();
7342 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7343 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7344 // R10.
7345 HandleCustomVecRegLoc();
7346 HandleCustomVecRegLoc();
7347
7348 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7349 // we passed the vector in R5, R6, R7 and R8.
7350 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7351 assert(!IsPPC64 &&
7352 "Only 2 custom RegLocs expected for 64-bit codegen.");
7353 HandleCustomVecRegLoc();
7354 HandleCustomVecRegLoc();
7355 }
7356
7357 continue;
7358 }
7359
7360 if (VA.isRegLoc()) {
7361 if (VA.getValVT().isScalarInteger())
7363 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7364 switch (VA.getValVT().SimpleTy) {
7365 default:
7366 report_fatal_error("Unhandled value type for argument.");
7367 case MVT::f32:
7369 break;
7370 case MVT::f64:
7372 break;
7373 }
7374 } else if (VA.getValVT().isVector()) {
7375 switch (VA.getValVT().SimpleTy) {
7376 default:
7377 report_fatal_error("Unhandled value type for argument.");
7378 case MVT::v16i8:
7380 break;
7381 case MVT::v8i16:
7383 break;
7384 case MVT::v4i32:
7385 case MVT::v2i64:
7386 case MVT::v1i128:
7388 break;
7389 case MVT::v4f32:
7390 case MVT::v2f64:
7392 break;
7393 }
7394 }
7395 }
7396
7397 if (Flags.isByVal() && VA.isMemLoc()) {
7398 const unsigned Size =
7399 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7400 PtrByteSize);
7401 const int FI = MF.getFrameInfo().CreateFixedObject(
7402 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7403 /* IsAliased */ true);
7404 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7405 InVals.push_back(FIN);
7406
7407 continue;
7408 }
7409
7410 if (Flags.isByVal()) {
7411 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7412
7413 const MCPhysReg ArgReg = VA.getLocReg();
7414 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7415
7416 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7417 const int FI = MF.getFrameInfo().CreateFixedObject(
7418 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7419 /* IsAliased */ true);
7420 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7421 InVals.push_back(FIN);
7422
7423 // Add live ins for all the RegLocs for the same ByVal.
7424 const TargetRegisterClass *RegClass =
7425 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7426
7427 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7428 unsigned Offset) {
7429 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7430 // Since the callers side has left justified the aggregate in the
7431 // register, we can simply store the entire register into the stack
7432 // slot.
7433 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7434 // The store to the fixedstack object is needed becuase accessing a
7435 // field of the ByVal will use a gep and load. Ideally we will optimize
7436 // to extracting the value from the register directly, and elide the
7437 // stores when the arguments address is not taken, but that will need to
7438 // be future work.
7439 SDValue Store = DAG.getStore(
7440 CopyFrom.getValue(1), dl, CopyFrom,
7443
7444 MemOps.push_back(Store);
7445 };
7446
7447 unsigned Offset = 0;
7448 HandleRegLoc(VA.getLocReg(), Offset);
7449 Offset += PtrByteSize;
7450 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7451 Offset += PtrByteSize) {
7452 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7453 "RegLocs should be for ByVal argument.");
7454
7455 const CCValAssign RL = ArgLocs[I++];
7456 HandleRegLoc(RL.getLocReg(), Offset);
7458 }
7459
7460 if (Offset != StackSize) {
7461 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7462 "Expected MemLoc for remaining bytes.");
7463 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7464 // Consume the MemLoc.The InVal has already been emitted, so nothing
7465 // more needs to be done.
7466 ++I;
7467 }
7468
7469 continue;
7470 }
7471
7472 if (VA.isRegLoc() && !VA.needsCustom()) {
7473 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7474 Register VReg =
7475 MF.addLiveIn(VA.getLocReg(),
7476 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7477 Subtarget.hasVSX()));
7478 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7479 if (ValVT.isScalarInteger() &&
7480 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7481 ArgValue =
7482 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7483 }
7484 InVals.push_back(ArgValue);
7485 continue;
7486 }
7487 if (VA.isMemLoc()) {
7488 HandleMemLoc();
7489 continue;
7490 }
7491 }
7492
7493 // On AIX a minimum of 8 words is saved to the parameter save area.
7494 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7495 // Area that is at least reserved in the caller of this function.
7496 unsigned CallerReservedArea = std::max<unsigned>(
7497 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7498
7499 // Set the size that is at least reserved in caller of this function. Tail
7500 // call optimized function's reserved stack space needs to be aligned so
7501 // that taking the difference between two stack areas will result in an
7502 // aligned stack.
7503 CallerReservedArea =
7504 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7505 FuncInfo->setMinReservedArea(CallerReservedArea);
7506
7507 if (isVarArg) {
7508 FuncInfo->setVarArgsFrameIndex(
7509 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7510 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7511
7512 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7513 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7514
7515 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7516 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7517 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7518
7519 // The fixed integer arguments of a variadic function are stored to the
7520 // VarArgsFrameIndex on the stack so that they may be loaded by
7521 // dereferencing the result of va_next.
7522 for (unsigned GPRIndex =
7523 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7524 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7525
7526 const Register VReg =
7527 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7528 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7529
7530 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7531 SDValue Store =
7532 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7533 MemOps.push_back(Store);
7534 // Increment the address for the next argument to store.
7535 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7536 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7537 }
7538 }
7539
7540 if (!MemOps.empty())
7541 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7542
7543 return Chain;
7544}
7545
7546SDValue PPCTargetLowering::LowerCall_AIX(
7547 SDValue Chain, SDValue Callee, CallFlags CFlags,
7549 const SmallVectorImpl<SDValue> &OutVals,
7550 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7552 const CallBase *CB) const {
7553 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7554 // AIX ABI stack frame layout.
7555
7556 assert((CFlags.CallConv == CallingConv::C ||
7557 CFlags.CallConv == CallingConv::Cold ||
7558 CFlags.CallConv == CallingConv::Fast) &&
7559 "Unexpected calling convention!");
7560
7561 if (CFlags.IsPatchPoint)
7562 report_fatal_error("This call type is unimplemented on AIX.");
7563
7564 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7565
7568 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7569 *DAG.getContext());
7570
7571 // Reserve space for the linkage save area (LSA) on the stack.
7572 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7573 // [SP][CR][LR][2 x reserved][TOC].
7574 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7575 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7576 const bool IsPPC64 = Subtarget.isPPC64();
7577 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7578 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7579 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7580 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7581
7582 // The prolog code of the callee may store up to 8 GPR argument registers to
7583 // the stack, allowing va_start to index over them in memory if the callee
7584 // is variadic.
7585 // Because we cannot tell if this is needed on the caller side, we have to
7586 // conservatively assume that it is needed. As such, make sure we have at
7587 // least enough stack space for the caller to store the 8 GPRs.
7588 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7589 const unsigned NumBytes = std::max<unsigned>(
7590 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7591
7592 // Adjust the stack pointer for the new arguments...
7593 // These operations are automatically eliminated by the prolog/epilog pass.
7594 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7595 SDValue CallSeqStart = Chain;
7596
7598 SmallVector<SDValue, 8> MemOpChains;
7599
7600 // Set up a copy of the stack pointer for loading and storing any
7601 // arguments that may not fit in the registers available for argument
7602 // passing.
7603 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7604 : DAG.getRegister(PPC::R1, MVT::i32);
7605
7606 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7607 const unsigned ValNo = ArgLocs[I].getValNo();
7608 SDValue Arg = OutVals[ValNo];
7609 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7610
7611 if (Flags.isByVal()) {
7612 const unsigned ByValSize = Flags.getByValSize();
7613
7614 // Nothing to do for zero-sized ByVals on the caller side.
7615 if (!ByValSize) {
7616 ++I;
7617 continue;
7618 }
7619
7620 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7621 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7622 (LoadOffset != 0)
7623 ? DAG.getObjectPtrOffset(
7624 dl, Arg, TypeSize::getFixed(LoadOffset))
7625 : Arg,
7626 MachinePointerInfo(), VT);
7627 };
7628
7629 unsigned LoadOffset = 0;
7630
7631 // Initialize registers, which are fully occupied by the by-val argument.
7632 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7633 SDValue Load = GetLoad(PtrVT, LoadOffset);
7634 MemOpChains.push_back(Load.getValue(1));
7635 LoadOffset += PtrByteSize;
7636 const CCValAssign &ByValVA = ArgLocs[I++];
7637 assert(ByValVA.getValNo() == ValNo &&
7638 "Unexpected location for pass-by-value argument.");
7639 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7640 }
7641
7642 if (LoadOffset == ByValSize)
7643 continue;
7644
7645 // There must be one more loc to handle the remainder.
7646 assert(ArgLocs[I].getValNo() == ValNo &&
7647 "Expected additional location for by-value argument.");
7648
7649 if (ArgLocs[I].isMemLoc()) {
7650 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7651 const CCValAssign &ByValVA = ArgLocs[I++];
7652 ISD::ArgFlagsTy MemcpyFlags = Flags;
7653 // Only memcpy the bytes that don't pass in register.
7654 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7655 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7656 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7657 dl, Arg, TypeSize::getFixed(LoadOffset))
7658 : Arg,
7660 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7661 CallSeqStart, MemcpyFlags, DAG, dl);
7662 continue;
7663 }
7664
7665 // Initialize the final register residue.
7666 // Any residue that occupies the final by-val arg register must be
7667 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7668 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7669 // 2 and 1 byte loads.
7670 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7671 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7672 "Unexpected register residue for by-value argument.");
7673 SDValue ResidueVal;
7674 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7675 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7676 const MVT VT =
7677 N == 1 ? MVT::i8
7678 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7679 SDValue Load = GetLoad(VT, LoadOffset);
7680 MemOpChains.push_back(Load.getValue(1));
7681 LoadOffset += N;
7682 Bytes += N;
7683
7684 // By-val arguments are passed left-justfied in register.
7685 // Every load here needs to be shifted, otherwise a full register load
7686 // should have been used.
7687 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7688 "Unexpected load emitted during handling of pass-by-value "
7689 "argument.");
7690 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7691 EVT ShiftAmountTy =
7692 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7693 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7694 SDValue ShiftedLoad =
7695 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7696 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7697 ShiftedLoad)
7698 : ShiftedLoad;
7699 }
7700
7701 const CCValAssign &ByValVA = ArgLocs[I++];
7702 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7703 continue;
7704 }
7705
7706 CCValAssign &VA = ArgLocs[I++];
7707 const MVT LocVT = VA.getLocVT();
7708 const MVT ValVT = VA.getValVT();
7709
7710 switch (VA.getLocInfo()) {
7711 default:
7712 report_fatal_error("Unexpected argument extension type.");
7713 case CCValAssign::Full:
7714 break;
7715 case CCValAssign::ZExt:
7716 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7717 break;
7718 case CCValAssign::SExt:
7719 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7720 break;
7721 }
7722
7723 if (VA.isRegLoc() && !VA.needsCustom()) {
7724 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7725 continue;
7726 }
7727
7728 // Vector arguments passed to VarArg functions need custom handling when
7729 // they are passed (at least partially) in GPRs.
7730 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7731 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7732 // Store value to its stack slot.
7733 SDValue PtrOff =
7734 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7735 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7736 SDValue Store =
7737 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7738 MemOpChains.push_back(Store);
7739 const unsigned OriginalValNo = VA.getValNo();
7740 // Then load the GPRs from the stack
7741 unsigned LoadOffset = 0;
7742 auto HandleCustomVecRegLoc = [&]() {
7743 assert(I != E && "Unexpected end of CCvalAssigns.");
7744 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7745 "Expected custom RegLoc.");
7746 CCValAssign RegVA = ArgLocs[I++];
7747 assert(RegVA.getValNo() == OriginalValNo &&
7748 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7749 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7750 DAG.getConstant(LoadOffset, dl, PtrVT));
7751 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7752 MemOpChains.push_back(Load.getValue(1));
7753 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7754 LoadOffset += PtrByteSize;
7755 };
7756
7757 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7758 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7759 // R10.
7760 HandleCustomVecRegLoc();
7761 HandleCustomVecRegLoc();
7762
7763 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7764 ArgLocs[I].getValNo() == OriginalValNo) {
7765 assert(!IsPPC64 &&
7766 "Only 2 custom RegLocs expected for 64-bit codegen.");
7767 HandleCustomVecRegLoc();
7768 HandleCustomVecRegLoc();
7769 }
7770
7771 continue;
7772 }
7773
7774 if (VA.isMemLoc()) {
7775 SDValue PtrOff =
7776 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7777 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7778 MemOpChains.push_back(
7779 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7780
7781 continue;
7782 }
7783
7784 if (!ValVT.isFloatingPoint())
7786 "Unexpected register handling for calling convention.");
7787
7788 // Custom handling is used for GPR initializations for vararg float
7789 // arguments.
7790 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7791 LocVT.isInteger() &&
7792 "Custom register handling only expected for VarArg.");
7793
7794 SDValue ArgAsInt =
7795 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7796
7797 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7798 // f32 in 32-bit GPR
7799 // f64 in 64-bit GPR
7800 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7801 else if (Arg.getValueType().getFixedSizeInBits() <
7802 LocVT.getFixedSizeInBits())
7803 // f32 in 64-bit GPR.
7804 RegsToPass.push_back(std::make_pair(
7805 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7806 else {
7807 // f64 in two 32-bit GPRs
7808 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7809 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7810 "Unexpected custom register for argument!");
7811 CCValAssign &GPR1 = VA;
7812 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7813 DAG.getConstant(32, dl, MVT::i8));
7814 RegsToPass.push_back(std::make_pair(
7815 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7816
7817 if (I != E) {
7818 // If only 1 GPR was available, there will only be one custom GPR and
7819 // the argument will also pass in memory.
7820 CCValAssign &PeekArg = ArgLocs[I];
7821 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7822 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7823 CCValAssign &GPR2 = ArgLocs[I++];
7824 RegsToPass.push_back(std::make_pair(
7825 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7826 }
7827 }
7828 }
7829 }
7830
7831 if (!MemOpChains.empty())
7832 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7833
7834 // For indirect calls, we need to save the TOC base to the stack for
7835 // restoration after the call.
7836 if (CFlags.IsIndirect) {
7837 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7838 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7839 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7840 const MVT PtrVT = Subtarget.getScalarIntVT();
7841 const unsigned TOCSaveOffset =
7842 Subtarget.getFrameLowering()->getTOCSaveOffset();
7843
7844 setUsesTOCBasePtr(DAG);
7845 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7846 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7847 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7848 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7849 Chain = DAG.getStore(
7850 Val.getValue(1), dl, Val, AddPtr,
7851 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7852 }
7853
7854 // Build a sequence of copy-to-reg nodes chained together with token chain
7855 // and flag operands which copy the outgoing args into the appropriate regs.
7856 SDValue InGlue;
7857 for (auto Reg : RegsToPass) {
7858 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7859 InGlue = Chain.getValue(1);
7860 }
7861
7862 const int SPDiff = 0;
7863 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7864 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7865}
7866
7867bool
7868PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7869 MachineFunction &MF, bool isVarArg,
7871 LLVMContext &Context,
7872 const Type *RetTy) const {
7874 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7875 return CCInfo.CheckReturn(
7876 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7878 : RetCC_PPC);
7879}
7880
7881SDValue
7882PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7883 bool isVarArg,
7885 const SmallVectorImpl<SDValue> &OutVals,
7886 const SDLoc &dl, SelectionDAG &DAG) const {
7888 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7889 *DAG.getContext());
7890 CCInfo.AnalyzeReturn(Outs,
7891 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7893 : RetCC_PPC);
7894
7895 SDValue Glue;
7896 SmallVector<SDValue, 4> RetOps(1, Chain);
7897
7898 // Copy the result values into the output registers.
7899 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7900 CCValAssign &VA = RVLocs[i];
7901 assert(VA.isRegLoc() && "Can only return in registers!");
7902
7903 SDValue Arg = OutVals[RealResIdx];
7904
7905 switch (VA.getLocInfo()) {
7906 default: llvm_unreachable("Unknown loc info!");
7907 case CCValAssign::Full: break;
7908 case CCValAssign::AExt:
7909 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7910 break;
7911 case CCValAssign::ZExt:
7912 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7913 break;
7914 case CCValAssign::SExt:
7915 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7916 break;
7917 }
7918 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7919 bool isLittleEndian = Subtarget.isLittleEndian();
7920 // Legalize ret f64 -> ret 2 x i32.
7921 SDValue SVal =
7922 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7923 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7924 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7925 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7926 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7927 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7928 Glue = Chain.getValue(1);
7929 VA = RVLocs[++i]; // skip ahead to next loc
7930 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7931 } else
7932 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7933 Glue = Chain.getValue(1);
7934 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7935 }
7936
7937 RetOps[0] = Chain; // Update chain.
7938
7939 // Add the glue if we have it.
7940 if (Glue.getNode())
7941 RetOps.push_back(Glue);
7942
7943 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7944}
7945
7946SDValue
7947PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 SDLoc dl(Op);
7950
7951 // Get the correct type for integers.
7952 EVT IntVT = Op.getValueType();
7953
7954 // Get the inputs.
7955 SDValue Chain = Op.getOperand(0);
7956 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7957 // Build a DYNAREAOFFSET node.
7958 SDValue Ops[2] = {Chain, FPSIdx};
7959 SDVTList VTs = DAG.getVTList(IntVT);
7960 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7961}
7962
7963SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7964 SelectionDAG &DAG) const {
7965 // When we pop the dynamic allocation we need to restore the SP link.
7966 SDLoc dl(Op);
7967
7968 // Get the correct type for pointers.
7969 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7970
7971 // Construct the stack pointer operand.
7972 bool isPPC64 = Subtarget.isPPC64();
7973 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7974 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7975
7976 // Get the operands for the STACKRESTORE.
7977 SDValue Chain = Op.getOperand(0);
7978 SDValue SaveSP = Op.getOperand(1);
7979
7980 // Load the old link SP.
7981 SDValue LoadLinkSP =
7982 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7983
7984 // Restore the stack pointer.
7985 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7986
7987 // Store the old link SP.
7988 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7989}
7990
7991SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7993 bool isPPC64 = Subtarget.isPPC64();
7994 EVT PtrVT = getPointerTy(MF.getDataLayout());
7995
7996 // Get current frame pointer save index. The users of this index will be
7997 // primarily DYNALLOC instructions.
7999 int RASI = FI->getReturnAddrSaveIndex();
8000
8001 // If the frame pointer save index hasn't been defined yet.
8002 if (!RASI) {
8003 // Find out what the fix offset of the frame pointer save area.
8004 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8005 // Allocate the frame index for frame pointer save area.
8006 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8007 // Save the result.
8008 FI->setReturnAddrSaveIndex(RASI);
8009 }
8010 return DAG.getFrameIndex(RASI, PtrVT);
8011}
8012
8013SDValue
8014PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8016 bool isPPC64 = Subtarget.isPPC64();
8017 EVT PtrVT = getPointerTy(MF.getDataLayout());
8018
8019 // Get current frame pointer save index. The users of this index will be
8020 // primarily DYNALLOC instructions.
8022 int FPSI = FI->getFramePointerSaveIndex();
8023
8024 // If the frame pointer save index hasn't been defined yet.
8025 if (!FPSI) {
8026 // Find out what the fix offset of the frame pointer save area.
8027 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8028 // Allocate the frame index for frame pointer save area.
8029 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8030 // Save the result.
8031 FI->setFramePointerSaveIndex(FPSI);
8032 }
8033 return DAG.getFrameIndex(FPSI, PtrVT);
8034}
8035
8036SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8037 SelectionDAG &DAG) const {
8039 // Get the inputs.
8040 SDValue Chain = Op.getOperand(0);
8041 SDValue Size = Op.getOperand(1);
8042 SDLoc dl(Op);
8043
8044 // Get the correct type for pointers.
8045 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8046 // Negate the size.
8047 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8048 DAG.getConstant(0, dl, PtrVT), Size);
8049 // Construct a node for the frame pointer save index.
8050 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8051 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8052 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8053 if (hasInlineStackProbe(MF))
8054 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8055 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8056}
8057
8058SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8059 SelectionDAG &DAG) const {
8061
8062 bool isPPC64 = Subtarget.isPPC64();
8063 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8064
8065 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8066 return DAG.getFrameIndex(FI, PtrVT);
8067}
8068
8069SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8070 SelectionDAG &DAG) const {
8071 SDLoc DL(Op);
8072 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8073 DAG.getVTList(MVT::i32, MVT::Other),
8074 Op.getOperand(0), Op.getOperand(1));
8075}
8076
8077SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8078 SelectionDAG &DAG) const {
8079 SDLoc DL(Op);
8080 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8081 Op.getOperand(0), Op.getOperand(1));
8082}
8083
8084SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8085 if (Op.getValueType().isVector())
8086 return LowerVectorLoad(Op, DAG);
8087
8088 assert(Op.getValueType() == MVT::i1 &&
8089 "Custom lowering only for i1 loads");
8090
8091 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8092
8093 SDLoc dl(Op);
8094 LoadSDNode *LD = cast<LoadSDNode>(Op);
8095
8096 SDValue Chain = LD->getChain();
8097 SDValue BasePtr = LD->getBasePtr();
8098 MachineMemOperand *MMO = LD->getMemOperand();
8099
8100 SDValue NewLD =
8101 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8102 BasePtr, MVT::i8, MMO);
8103 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8104
8105 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8106 return DAG.getMergeValues(Ops, dl);
8107}
8108
8109SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8110 if (Op.getOperand(1).getValueType().isVector())
8111 return LowerVectorStore(Op, DAG);
8112
8113 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8114 "Custom lowering only for i1 stores");
8115
8116 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8117
8118 SDLoc dl(Op);
8119 StoreSDNode *ST = cast<StoreSDNode>(Op);
8120
8121 SDValue Chain = ST->getChain();
8122 SDValue BasePtr = ST->getBasePtr();
8123 SDValue Value = ST->getValue();
8124 MachineMemOperand *MMO = ST->getMemOperand();
8125
8127 Value);
8128 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8129}
8130
8131// FIXME: Remove this once the ANDI glue bug is fixed:
8132SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8133 assert(Op.getValueType() == MVT::i1 &&
8134 "Custom lowering only for i1 results");
8135
8136 SDLoc DL(Op);
8137 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8138}
8139
8140SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8141 SelectionDAG &DAG) const {
8142
8143 // Implements a vector truncate that fits in a vector register as a shuffle.
8144 // We want to legalize vector truncates down to where the source fits in
8145 // a vector register (and target is therefore smaller than vector register
8146 // size). At that point legalization will try to custom lower the sub-legal
8147 // result and get here - where we can contain the truncate as a single target
8148 // operation.
8149
8150 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8151 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8152 //
8153 // We will implement it for big-endian ordering as this (where x denotes
8154 // undefined):
8155 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8156 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8157 //
8158 // The same operation in little-endian ordering will be:
8159 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8160 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8161
8162 EVT TrgVT = Op.getValueType();
8163 assert(TrgVT.isVector() && "Vector type expected.");
8164 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8165 EVT EltVT = TrgVT.getVectorElementType();
8166 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8167 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8168 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8169 return SDValue();
8170
8171 SDValue N1 = Op.getOperand(0);
8172 EVT SrcVT = N1.getValueType();
8173 unsigned SrcSize = SrcVT.getSizeInBits();
8174 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8175 !llvm::has_single_bit<uint32_t>(
8177 return SDValue();
8178 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8179 return SDValue();
8180
8181 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8182 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8183
8184 SDLoc DL(Op);
8185 SDValue Op1, Op2;
8186 if (SrcSize == 256) {
8187 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8188 EVT SplitVT =
8190 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8191 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8192 DAG.getConstant(0, DL, VecIdxTy));
8193 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8194 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8195 }
8196 else {
8197 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8198 Op2 = DAG.getUNDEF(WideVT);
8199 }
8200
8201 // First list the elements we want to keep.
8202 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8203 SmallVector<int, 16> ShuffV;
8204 if (Subtarget.isLittleEndian())
8205 for (unsigned i = 0; i < TrgNumElts; ++i)
8206 ShuffV.push_back(i * SizeMult);
8207 else
8208 for (unsigned i = 1; i <= TrgNumElts; ++i)
8209 ShuffV.push_back(i * SizeMult - 1);
8210
8211 // Populate the remaining elements with undefs.
8212 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8213 // ShuffV.push_back(i + WideNumElts);
8214 ShuffV.push_back(WideNumElts + 1);
8215
8216 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8217 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8218 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8219}
8220
8221/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8222/// possible.
8223SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8224 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8225 EVT ResVT = Op.getValueType();
8226 EVT CmpVT = Op.getOperand(0).getValueType();
8227 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8228 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8229 SDLoc dl(Op);
8230
8231 // Without power9-vector, we don't have native instruction for f128 comparison.
8232 // Following transformation to libcall is needed for setcc:
8233 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8234 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8235 SDValue Z = DAG.getSetCC(
8236 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8237 LHS, RHS, CC);
8238 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8239 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8240 }
8241
8242 // Not FP, or using SPE? Not a fsel.
8243 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8244 Subtarget.hasSPE())
8245 return Op;
8246
8247 SDNodeFlags Flags = Op.getNode()->getFlags();
8248
8249 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8250 // presence of infinities.
8251 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8252 switch (CC) {
8253 default:
8254 break;
8255 case ISD::SETOGT:
8256 case ISD::SETGT:
8257 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8258 case ISD::SETOLT:
8259 case ISD::SETLT:
8260 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8261 }
8262 }
8263
8264 // We might be able to do better than this under some circumstances, but in
8265 // general, fsel-based lowering of select is a finite-math-only optimization.
8266 // For more information, see section F.3 of the 2.06 ISA specification.
8267 // With ISA 3.0
8268 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8269 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8270 ResVT == MVT::f128)
8271 return Op;
8272
8273 // If the RHS of the comparison is a 0.0, we don't need to do the
8274 // subtraction at all.
8275 SDValue Sel1;
8276 if (isFloatingPointZero(RHS))
8277 switch (CC) {
8278 default: break; // SETUO etc aren't handled by fsel.
8279 case ISD::SETNE:
8280 std::swap(TV, FV);
8281 [[fallthrough]];
8282 case ISD::SETEQ:
8283 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8284 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8285 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8286 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8287 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8288 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8289 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8290 case ISD::SETULT:
8291 case ISD::SETLT:
8292 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8293 [[fallthrough]];
8294 case ISD::SETOGE:
8295 case ISD::SETGE:
8296 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8297 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8298 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8299 case ISD::SETUGT:
8300 case ISD::SETGT:
8301 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8302 [[fallthrough]];
8303 case ISD::SETOLE:
8304 case ISD::SETLE:
8305 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8306 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8307 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8308 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8309 }
8310
8311 SDValue Cmp;
8312 switch (CC) {
8313 default: break; // SETUO etc aren't handled by fsel.
8314 case ISD::SETNE:
8315 std::swap(TV, FV);
8316 [[fallthrough]];
8317 case ISD::SETEQ:
8318 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8319 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8320 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8321 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8322 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8323 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8324 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8325 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8326 case ISD::SETULT:
8327 case ISD::SETLT:
8328 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8329 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8330 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8331 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8332 case ISD::SETOGE:
8333 case ISD::SETGE:
8334 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8335 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8336 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8337 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8338 case ISD::SETUGT:
8339 case ISD::SETGT:
8340 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8341 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8342 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8343 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8344 case ISD::SETOLE:
8345 case ISD::SETLE:
8346 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8347 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8348 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8349 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8350 }
8351 return Op;
8352}
8353
8354static unsigned getPPCStrictOpcode(unsigned Opc) {
8355 switch (Opc) {
8356 default:
8357 llvm_unreachable("No strict version of this opcode!");
8358 case PPCISD::FCTIDZ:
8359 return PPCISD::STRICT_FCTIDZ;
8360 case PPCISD::FCTIWZ:
8361 return PPCISD::STRICT_FCTIWZ;
8362 case PPCISD::FCTIDUZ:
8364 case PPCISD::FCTIWUZ:
8366 case PPCISD::FCFID:
8367 return PPCISD::STRICT_FCFID;
8368 case PPCISD::FCFIDU:
8369 return PPCISD::STRICT_FCFIDU;
8370 case PPCISD::FCFIDS:
8371 return PPCISD::STRICT_FCFIDS;
8372 case PPCISD::FCFIDUS:
8374 }
8375}
8376
8378 const PPCSubtarget &Subtarget) {
8379 SDLoc dl(Op);
8380 bool IsStrict = Op->isStrictFPOpcode();
8381 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8382 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8383
8384 // TODO: Any other flags to propagate?
8385 SDNodeFlags Flags;
8386 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8387
8388 // For strict nodes, source is the second operand.
8389 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8390 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8391 MVT DestTy = Op.getSimpleValueType();
8392 assert(Src.getValueType().isFloatingPoint() &&
8393 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8394 DestTy == MVT::i64) &&
8395 "Invalid FP_TO_INT types");
8396 if (Src.getValueType() == MVT::f32) {
8397 if (IsStrict) {
8398 Src =
8400 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8401 Chain = Src.getValue(1);
8402 } else
8403 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8404 }
8405 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8406 DestTy = Subtarget.getScalarIntVT();
8407 unsigned Opc = ISD::DELETED_NODE;
8408 switch (DestTy.SimpleTy) {
8409 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8410 case MVT::i32:
8411 Opc = IsSigned ? PPCISD::FCTIWZ
8412 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8413 break;
8414 case MVT::i64:
8415 assert((IsSigned || Subtarget.hasFPCVT()) &&
8416 "i64 FP_TO_UINT is supported only with FPCVT");
8417 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8418 }
8419 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8420 SDValue Conv;
8421 if (IsStrict) {
8422 Opc = getPPCStrictOpcode(Opc);
8423 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8424 Flags);
8425 } else {
8426 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8427 }
8428 return Conv;
8429}
8430
8431void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8432 SelectionDAG &DAG,
8433 const SDLoc &dl) const {
8434 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8435 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8436 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8437 bool IsStrict = Op->isStrictFPOpcode();
8438
8439 // Convert the FP value to an int value through memory.
8440 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8441 (IsSigned || Subtarget.hasFPCVT());
8442 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8443 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8444 MachinePointerInfo MPI =
8446
8447 // Emit a store to the stack slot.
8448 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8449 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8450 if (i32Stack) {
8452 Alignment = Align(4);
8453 MachineMemOperand *MMO =
8454 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8455 SDValue Ops[] = { Chain, Tmp, FIPtr };
8456 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8457 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8458 } else
8459 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8460
8461 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8462 // add in a bias on big endian.
8463 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8464 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8465 DAG.getConstant(4, dl, FIPtr.getValueType()));
8466 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8467 }
8468
8469 RLI.Chain = Chain;
8470 RLI.Ptr = FIPtr;
8471 RLI.MPI = MPI;
8472 RLI.Alignment = Alignment;
8473}
8474
8475/// Custom lowers floating point to integer conversions to use
8476/// the direct move instructions available in ISA 2.07 to avoid the
8477/// need for load/store combinations.
8478SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8479 SelectionDAG &DAG,
8480 const SDLoc &dl) const {
8481 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8482 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8483 if (Op->isStrictFPOpcode())
8484 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8485 else
8486 return Mov;
8487}
8488
8489SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8490 const SDLoc &dl) const {
8491 bool IsStrict = Op->isStrictFPOpcode();
8492 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8493 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8494 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8495 EVT SrcVT = Src.getValueType();
8496 EVT DstVT = Op.getValueType();
8497
8498 // FP to INT conversions are legal for f128.
8499 if (SrcVT == MVT::f128)
8500 return Subtarget.hasP9Vector() ? Op : SDValue();
8501
8502 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8503 // PPC (the libcall is not available).
8504 if (SrcVT == MVT::ppcf128) {
8505 if (DstVT == MVT::i32) {
8506 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8507 // set other fast-math flags to FP operations in both strict and
8508 // non-strict cases. (FP_TO_SINT, FSUB)
8510 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8511
8512 if (IsSigned) {
8513 SDValue Lo, Hi;
8514 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8515
8516 // Add the two halves of the long double in round-to-zero mode, and use
8517 // a smaller FP_TO_SINT.
8518 if (IsStrict) {
8520 DAG.getVTList(MVT::f64, MVT::Other),
8521 {Op.getOperand(0), Lo, Hi}, Flags);
8522 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8523 DAG.getVTList(MVT::i32, MVT::Other),
8524 {Res.getValue(1), Res}, Flags);
8525 } else {
8526 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8527 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8528 }
8529 } else {
8530 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8531 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8532 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8533 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8534 if (IsStrict) {
8535 // Sel = Src < 0x80000000
8536 // FltOfs = select Sel, 0.0, 0x80000000
8537 // IntOfs = select Sel, 0, 0x80000000
8538 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8539 SDValue Chain = Op.getOperand(0);
8540 EVT SetCCVT =
8541 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8542 EVT DstSetCCVT =
8543 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8544 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8545 Chain, true);
8546 Chain = Sel.getValue(1);
8547
8548 SDValue FltOfs = DAG.getSelect(
8549 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8550 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8551
8552 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8553 DAG.getVTList(SrcVT, MVT::Other),
8554 {Chain, Src, FltOfs}, Flags);
8555 Chain = Val.getValue(1);
8556 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8557 DAG.getVTList(DstVT, MVT::Other),
8558 {Chain, Val}, Flags);
8559 Chain = SInt.getValue(1);
8560 SDValue IntOfs = DAG.getSelect(
8561 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8562 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8563 return DAG.getMergeValues({Result, Chain}, dl);
8564 } else {
8565 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8566 // FIXME: generated code sucks.
8567 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8568 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8569 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8570 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8571 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8572 }
8573 }
8574 }
8575
8576 return SDValue();
8577 }
8578
8579 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8580 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8581
8582 ReuseLoadInfo RLI;
8583 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8584
8585 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8586 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8587}
8588
8589// We're trying to insert a regular store, S, and then a load, L. If the
8590// incoming value, O, is a load, we might just be able to have our load use the
8591// address used by O. However, we don't know if anything else will store to
8592// that address before we can load from it. To prevent this situation, we need
8593// to insert our load, L, into the chain as a peer of O. To do this, we give L
8594// the same chain operand as O, we create a token factor from the chain results
8595// of O and L, and we replace all uses of O's chain result with that token
8596// factor (this last part is handled by makeEquivalentMemoryOrdering).
8597bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8598 ReuseLoadInfo &RLI,
8599 SelectionDAG &DAG,
8600 ISD::LoadExtType ET) const {
8601 // Conservatively skip reusing for constrained FP nodes.
8602 if (Op->isStrictFPOpcode())
8603 return false;
8604
8605 SDLoc dl(Op);
8606 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8607 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8608 if (ET == ISD::NON_EXTLOAD &&
8609 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8610 isOperationLegalOrCustom(Op.getOpcode(),
8611 Op.getOperand(0).getValueType())) {
8612
8613 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8614 return true;
8615 }
8616
8617 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8618 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8619 LD->isNonTemporal())
8620 return false;
8621 if (LD->getMemoryVT() != MemVT)
8622 return false;
8623
8624 // If the result of the load is an illegal type, then we can't build a
8625 // valid chain for reuse since the legalised loads and token factor node that
8626 // ties the legalised loads together uses a different output chain then the
8627 // illegal load.
8628 if (!isTypeLegal(LD->getValueType(0)))
8629 return false;
8630
8631 RLI.Ptr = LD->getBasePtr();
8632 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8633 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8634 "Non-pre-inc AM on PPC?");
8635 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8636 LD->getOffset());
8637 }
8638
8639 RLI.Chain = LD->getChain();
8640 RLI.MPI = LD->getPointerInfo();
8641 RLI.IsDereferenceable = LD->isDereferenceable();
8642 RLI.IsInvariant = LD->isInvariant();
8643 RLI.Alignment = LD->getAlign();
8644 RLI.AAInfo = LD->getAAInfo();
8645 RLI.Ranges = LD->getRanges();
8646
8647 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8648 return true;
8649}
8650
8651/// Analyze profitability of direct move
8652/// prefer float load to int load plus direct move
8653/// when there is no integer use of int load
8654bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8655 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8656 if (Origin->getOpcode() != ISD::LOAD)
8657 return true;
8658
8659 // If there is no LXSIBZX/LXSIHZX, like Power8,
8660 // prefer direct move if the memory size is 1 or 2 bytes.
8661 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8662 if (!Subtarget.hasP9Vector() &&
8663 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8664 return true;
8665
8666 for (SDUse &Use : Origin->uses()) {
8667
8668 // Only look at the users of the loaded value.
8669 if (Use.getResNo() != 0)
8670 continue;
8671
8672 SDNode *User = Use.getUser();
8673 if (User->getOpcode() != ISD::SINT_TO_FP &&
8674 User->getOpcode() != ISD::UINT_TO_FP &&
8675 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8676 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8677 return true;
8678 }
8679
8680 return false;
8681}
8682
8684 const PPCSubtarget &Subtarget,
8685 SDValue Chain = SDValue()) {
8686 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8687 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8688 SDLoc dl(Op);
8689
8690 // TODO: Any other flags to propagate?
8691 SDNodeFlags Flags;
8692 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8693
8694 // If we have FCFIDS, then use it when converting to single-precision.
8695 // Otherwise, convert to double-precision and then round.
8696 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8697 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8698 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8699 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8700 if (Op->isStrictFPOpcode()) {
8701 if (!Chain)
8702 Chain = Op.getOperand(0);
8703 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8704 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8705 } else
8706 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8707}
8708
8709/// Custom lowers integer to floating point conversions to use
8710/// the direct move instructions available in ISA 2.07 to avoid the
8711/// need for load/store combinations.
8712SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8713 SelectionDAG &DAG,
8714 const SDLoc &dl) const {
8715 assert((Op.getValueType() == MVT::f32 ||
8716 Op.getValueType() == MVT::f64) &&
8717 "Invalid floating point type as target of conversion");
8718 assert(Subtarget.hasFPCVT() &&
8719 "Int to FP conversions with direct moves require FPCVT");
8720 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8721 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8722 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8723 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8725 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8726 return convertIntToFP(Op, Mov, DAG, Subtarget);
8727}
8728
8729static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8730
8731 EVT VecVT = Vec.getValueType();
8732 assert(VecVT.isVector() && "Expected a vector type.");
8733 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8734
8735 EVT EltVT = VecVT.getVectorElementType();
8736 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8737 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8738
8739 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8740 SmallVector<SDValue, 16> Ops(NumConcat);
8741 Ops[0] = Vec;
8742 SDValue UndefVec = DAG.getUNDEF(VecVT);
8743 for (unsigned i = 1; i < NumConcat; ++i)
8744 Ops[i] = UndefVec;
8745
8746 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8747}
8748
8749SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8750 const SDLoc &dl) const {
8751 bool IsStrict = Op->isStrictFPOpcode();
8752 unsigned Opc = Op.getOpcode();
8753 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8754 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8756 "Unexpected conversion type");
8757 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8758 "Supports conversions to v2f64/v4f32 only.");
8759
8760 // TODO: Any other flags to propagate?
8762 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8763
8764 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8765 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8766
8767 SDValue Wide = widenVec(DAG, Src, dl);
8768 EVT WideVT = Wide.getValueType();
8769 unsigned WideNumElts = WideVT.getVectorNumElements();
8770 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8771
8772 SmallVector<int, 16> ShuffV;
8773 for (unsigned i = 0; i < WideNumElts; ++i)
8774 ShuffV.push_back(i + WideNumElts);
8775
8776 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8777 int SaveElts = FourEltRes ? 4 : 2;
8778 if (Subtarget.isLittleEndian())
8779 for (int i = 0; i < SaveElts; i++)
8780 ShuffV[i * Stride] = i;
8781 else
8782 for (int i = 1; i <= SaveElts; i++)
8783 ShuffV[i * Stride - 1] = i - 1;
8784
8785 SDValue ShuffleSrc2 =
8786 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8787 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8788
8789 SDValue Extend;
8790 if (SignedConv) {
8791 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8792 EVT ExtVT = Src.getValueType();
8793 if (Subtarget.hasP9Altivec())
8794 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8795 IntermediateVT.getVectorNumElements());
8796
8797 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8798 DAG.getValueType(ExtVT));
8799 } else
8800 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8801
8802 if (IsStrict)
8803 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8804 {Op.getOperand(0), Extend}, Flags);
8805
8806 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8807}
8808
8809SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8810 SelectionDAG &DAG) const {
8811 SDLoc dl(Op);
8812 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8813 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8814 bool IsStrict = Op->isStrictFPOpcode();
8815 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8816 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8817
8818 // TODO: Any other flags to propagate?
8820 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8821
8822 EVT InVT = Src.getValueType();
8823 EVT OutVT = Op.getValueType();
8824 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8825 isOperationCustom(Op.getOpcode(), InVT))
8826 return LowerINT_TO_FPVector(Op, DAG, dl);
8827
8828 // Conversions to f128 are legal.
8829 if (Op.getValueType() == MVT::f128)
8830 return Subtarget.hasP9Vector() ? Op : SDValue();
8831
8832 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8833 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8834 return SDValue();
8835
8836 if (Src.getValueType() == MVT::i1) {
8837 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8838 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8839 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8840 if (IsStrict)
8841 return DAG.getMergeValues({Sel, Chain}, dl);
8842 else
8843 return Sel;
8844 }
8845
8846 // If we have direct moves, we can do all the conversion, skip the store/load
8847 // however, without FPCVT we can't do most conversions.
8848 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8849 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8850 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8851
8852 assert((IsSigned || Subtarget.hasFPCVT()) &&
8853 "UINT_TO_FP is supported only with FPCVT");
8854
8855 if (Src.getValueType() == MVT::i64) {
8856 SDValue SINT = Src;
8857 // When converting to single-precision, we actually need to convert
8858 // to double-precision first and then round to single-precision.
8859 // To avoid double-rounding effects during that operation, we have
8860 // to prepare the input operand. Bits that might be truncated when
8861 // converting to double-precision are replaced by a bit that won't
8862 // be lost at this stage, but is below the single-precision rounding
8863 // position.
8864 //
8865 // However, if -enable-unsafe-fp-math is in effect, accept double
8866 // rounding to avoid the extra overhead.
8867 if (Op.getValueType() == MVT::f32 &&
8868 !Subtarget.hasFPCVT() &&
8870
8871 // Twiddle input to make sure the low 11 bits are zero. (If this
8872 // is the case, we are guaranteed the value will fit into the 53 bit
8873 // mantissa of an IEEE double-precision value without rounding.)
8874 // If any of those low 11 bits were not zero originally, make sure
8875 // bit 12 (value 2048) is set instead, so that the final rounding
8876 // to single-precision gets the correct result.
8877 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8878 SINT, DAG.getConstant(2047, dl, MVT::i64));
8879 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8880 Round, DAG.getConstant(2047, dl, MVT::i64));
8881 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8882 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8883 Round, DAG.getConstant(-2048, dl, MVT::i64));
8884
8885 // However, we cannot use that value unconditionally: if the magnitude
8886 // of the input value is small, the bit-twiddling we did above might
8887 // end up visibly changing the output. Fortunately, in that case, we
8888 // don't need to twiddle bits since the original input will convert
8889 // exactly to double-precision floating-point already. Therefore,
8890 // construct a conditional to use the original value if the top 11
8891 // bits are all sign-bit copies, and use the rounded value computed
8892 // above otherwise.
8893 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8894 SINT, DAG.getConstant(53, dl, MVT::i32));
8895 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8896 Cond, DAG.getConstant(1, dl, MVT::i64));
8897 Cond = DAG.getSetCC(
8898 dl,
8899 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8900 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8901
8902 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8903 }
8904
8905 ReuseLoadInfo RLI;
8906 SDValue Bits;
8907
8909 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8910 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8911 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8912 if (RLI.ResChain)
8913 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8914 } else if (Subtarget.hasLFIWAX() &&
8915 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8916 MachineMemOperand *MMO =
8918 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8919 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8921 DAG.getVTList(MVT::f64, MVT::Other),
8922 Ops, MVT::i32, MMO);
8923 if (RLI.ResChain)
8924 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8925 } else if (Subtarget.hasFPCVT() &&
8926 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8927 MachineMemOperand *MMO =
8929 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8930 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8932 DAG.getVTList(MVT::f64, MVT::Other),
8933 Ops, MVT::i32, MMO);
8934 if (RLI.ResChain)
8935 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8936 } else if (((Subtarget.hasLFIWAX() &&
8937 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8938 (Subtarget.hasFPCVT() &&
8939 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8940 SINT.getOperand(0).getValueType() == MVT::i32) {
8941 MachineFrameInfo &MFI = MF.getFrameInfo();
8942 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8943
8944 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8945 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8946
8947 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8949 DAG.getMachineFunction(), FrameIdx));
8950 Chain = Store;
8951
8952 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8953 "Expected an i32 store");
8954
8955 RLI.Ptr = FIdx;
8956 RLI.Chain = Chain;
8957 RLI.MPI =
8959 RLI.Alignment = Align(4);
8960
8961 MachineMemOperand *MMO =
8963 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8964 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8967 dl, DAG.getVTList(MVT::f64, MVT::Other),
8968 Ops, MVT::i32, MMO);
8969 Chain = Bits.getValue(1);
8970 } else
8971 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8972
8973 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8974 if (IsStrict)
8975 Chain = FP.getValue(1);
8976
8977 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8978 if (IsStrict)
8979 FP = DAG.getNode(
8980 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8981 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8982 Flags);
8983 else
8984 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8985 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8986 }
8987 return FP;
8988 }
8989
8990 assert(Src.getValueType() == MVT::i32 &&
8991 "Unhandled INT_TO_FP type in custom expander!");
8992 // Since we only generate this in 64-bit mode, we can take advantage of
8993 // 64-bit registers. In particular, sign extend the input value into the
8994 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8995 // then lfd it and fcfid it.
8997 MachineFrameInfo &MFI = MF.getFrameInfo();
8998 EVT PtrVT = getPointerTy(MF.getDataLayout());
8999
9000 SDValue Ld;
9001 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9002 ReuseLoadInfo RLI;
9003 bool ReusingLoad;
9004 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9005 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9006 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9007
9008 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9010 DAG.getMachineFunction(), FrameIdx));
9011 Chain = Store;
9012
9013 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9014 "Expected an i32 store");
9015
9016 RLI.Ptr = FIdx;
9017 RLI.Chain = Chain;
9018 RLI.MPI =
9020 RLI.Alignment = Align(4);
9021 }
9022
9023 MachineMemOperand *MMO =
9025 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9026 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9027 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9028 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9029 MVT::i32, MMO);
9030 Chain = Ld.getValue(1);
9031 if (ReusingLoad && RLI.ResChain) {
9032 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9033 }
9034 } else {
9035 assert(Subtarget.isPPC64() &&
9036 "i32->FP without LFIWAX supported only on PPC64");
9037
9038 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9039 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9040
9041 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9042
9043 // STD the extended value into the stack slot.
9044 SDValue Store = DAG.getStore(
9045 Chain, dl, Ext64, FIdx,
9047 Chain = Store;
9048
9049 // Load the value as a double.
9050 Ld = DAG.getLoad(
9051 MVT::f64, dl, Chain, FIdx,
9053 Chain = Ld.getValue(1);
9054 }
9055
9056 // FCFID it and return it.
9057 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9058 if (IsStrict)
9059 Chain = FP.getValue(1);
9060 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9061 if (IsStrict)
9062 FP = DAG.getNode(
9063 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9064 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9065 else
9066 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9067 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9068 }
9069 return FP;
9070}
9071
9072SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9073 SelectionDAG &DAG) const {
9074 SDLoc Dl(Op);
9076 EVT PtrVT = getPointerTy(MF.getDataLayout());
9077 SDValue Chain = Op.getOperand(0);
9078
9079 // If requested mode is constant, just use simpler mtfsb/mffscrni
9080 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9081 uint64_t Mode = CVal->getZExtValue();
9082 assert(Mode < 4 && "Unsupported rounding mode!");
9083 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9084 if (Subtarget.isISA3_0())
9085 return SDValue(
9086 DAG.getMachineNode(
9087 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9088 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9089 1);
9090 SDNode *SetHi = DAG.getMachineNode(
9091 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9092 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9093 SDNode *SetLo = DAG.getMachineNode(
9094 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9095 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9096 return SDValue(SetLo, 0);
9097 }
9098
9099 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9100 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9101 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9102 DAG.getConstant(3, Dl, MVT::i32));
9103 SDValue DstFlag = DAG.getNode(
9104 ISD::XOR, Dl, MVT::i32, SrcFlag,
9105 DAG.getNode(ISD::AND, Dl, MVT::i32,
9106 DAG.getNOT(Dl,
9107 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9108 MVT::i32),
9109 One));
9110 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9111 SDValue MFFS;
9112 if (!Subtarget.isISA3_0()) {
9113 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9114 Chain = MFFS.getValue(1);
9115 }
9116 SDValue NewFPSCR;
9117 if (Subtarget.isPPC64()) {
9118 if (Subtarget.isISA3_0()) {
9119 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9120 } else {
9121 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9122 SDNode *InsertRN = DAG.getMachineNode(
9123 PPC::RLDIMI, Dl, MVT::i64,
9124 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9125 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9126 DAG.getTargetConstant(0, Dl, MVT::i32),
9127 DAG.getTargetConstant(62, Dl, MVT::i32)});
9128 NewFPSCR = SDValue(InsertRN, 0);
9129 }
9130 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9131 } else {
9132 // In 32-bit mode, store f64, load and update the lower half.
9133 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9134 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9135 SDValue Addr = Subtarget.isLittleEndian()
9136 ? StackSlot
9137 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9138 DAG.getConstant(4, Dl, PtrVT));
9139 if (Subtarget.isISA3_0()) {
9140 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9141 } else {
9142 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9143 SDValue Tmp =
9144 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9145 Chain = Tmp.getValue(1);
9146 Tmp = SDValue(DAG.getMachineNode(
9147 PPC::RLWIMI, Dl, MVT::i32,
9148 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9149 DAG.getTargetConstant(30, Dl, MVT::i32),
9150 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9151 0);
9152 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9153 }
9154 NewFPSCR =
9155 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9156 Chain = NewFPSCR.getValue(1);
9157 }
9158 if (Subtarget.isISA3_0())
9159 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9160 {NewFPSCR, Chain}),
9161 1);
9162 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9163 SDNode *MTFSF = DAG.getMachineNode(
9164 PPC::MTFSF, Dl, MVT::Other,
9165 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9166 return SDValue(MTFSF, 0);
9167}
9168
9169SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9170 SelectionDAG &DAG) const {
9171 SDLoc dl(Op);
9172 /*
9173 The rounding mode is in bits 30:31 of FPSR, and has the following
9174 settings:
9175 00 Round to nearest
9176 01 Round to 0
9177 10 Round to +inf
9178 11 Round to -inf
9179
9180 GET_ROUNDING, on the other hand, expects the following:
9181 -1 Undefined
9182 0 Round to 0
9183 1 Round to nearest
9184 2 Round to +inf
9185 3 Round to -inf
9186
9187 To perform the conversion, we do:
9188 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9189 */
9190
9192 EVT VT = Op.getValueType();
9193 EVT PtrVT = getPointerTy(MF.getDataLayout());
9194
9195 // Save FP Control Word to register
9196 SDValue Chain = Op.getOperand(0);
9197 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9198 Chain = MFFS.getValue(1);
9199
9200 SDValue CWD;
9201 if (isTypeLegal(MVT::i64)) {
9202 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9203 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9204 } else {
9205 // Save FP register to stack slot
9206 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9207 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9208 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9209
9210 // Load FP Control Word from low 32 bits of stack slot.
9212 "Stack slot adjustment is valid only on big endian subtargets!");
9213 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9214 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9215 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9216 Chain = CWD.getValue(1);
9217 }
9218
9219 // Transform as necessary
9220 SDValue CWD1 =
9221 DAG.getNode(ISD::AND, dl, MVT::i32,
9222 CWD, DAG.getConstant(3, dl, MVT::i32));
9223 SDValue CWD2 =
9224 DAG.getNode(ISD::SRL, dl, MVT::i32,
9225 DAG.getNode(ISD::AND, dl, MVT::i32,
9226 DAG.getNode(ISD::XOR, dl, MVT::i32,
9227 CWD, DAG.getConstant(3, dl, MVT::i32)),
9228 DAG.getConstant(3, dl, MVT::i32)),
9229 DAG.getConstant(1, dl, MVT::i32));
9230
9231 SDValue RetVal =
9232 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9233
9234 RetVal =
9236 dl, VT, RetVal);
9237
9238 return DAG.getMergeValues({RetVal, Chain}, dl);
9239}
9240
9241SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9242 EVT VT = Op.getValueType();
9243 unsigned BitWidth = VT.getSizeInBits();
9244 SDLoc dl(Op);
9245 assert(Op.getNumOperands() == 3 &&
9246 VT == Op.getOperand(1).getValueType() &&
9247 "Unexpected SHL!");
9248
9249 // Expand into a bunch of logical ops. Note that these ops
9250 // depend on the PPC behavior for oversized shift amounts.
9251 SDValue Lo = Op.getOperand(0);
9252 SDValue Hi = Op.getOperand(1);
9253 SDValue Amt = Op.getOperand(2);
9254 EVT AmtVT = Amt.getValueType();
9255
9256 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9257 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9258 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9259 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9260 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9261 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9262 DAG.getConstant(-BitWidth, dl, AmtVT));
9263 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9264 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9265 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9266 SDValue OutOps[] = { OutLo, OutHi };
9267 return DAG.getMergeValues(OutOps, dl);
9268}
9269
9270SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9271 EVT VT = Op.getValueType();
9272 SDLoc dl(Op);
9273 unsigned BitWidth = VT.getSizeInBits();
9274 assert(Op.getNumOperands() == 3 &&
9275 VT == Op.getOperand(1).getValueType() &&
9276 "Unexpected SRL!");
9277
9278 // Expand into a bunch of logical ops. Note that these ops
9279 // depend on the PPC behavior for oversized shift amounts.
9280 SDValue Lo = Op.getOperand(0);
9281 SDValue Hi = Op.getOperand(1);
9282 SDValue Amt = Op.getOperand(2);
9283 EVT AmtVT = Amt.getValueType();
9284
9285 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9286 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9287 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9288 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9289 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9290 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9291 DAG.getConstant(-BitWidth, dl, AmtVT));
9292 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9293 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9294 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9295 SDValue OutOps[] = { OutLo, OutHi };
9296 return DAG.getMergeValues(OutOps, dl);
9297}
9298
9299SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9300 SDLoc dl(Op);
9301 EVT VT = Op.getValueType();
9302 unsigned BitWidth = VT.getSizeInBits();
9303 assert(Op.getNumOperands() == 3 &&
9304 VT == Op.getOperand(1).getValueType() &&
9305 "Unexpected SRA!");
9306
9307 // Expand into a bunch of logical ops, followed by a select_cc.
9308 SDValue Lo = Op.getOperand(0);
9309 SDValue Hi = Op.getOperand(1);
9310 SDValue Amt = Op.getOperand(2);
9311 EVT AmtVT = Amt.getValueType();
9312
9313 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9314 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9315 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9316 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9317 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9318 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9319 DAG.getConstant(-BitWidth, dl, AmtVT));
9320 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9321 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9322 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9323 Tmp4, Tmp6, ISD::SETLE);
9324 SDValue OutOps[] = { OutLo, OutHi };
9325 return DAG.getMergeValues(OutOps, dl);
9326}
9327
9328SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9329 SelectionDAG &DAG) const {
9330 SDLoc dl(Op);
9331 EVT VT = Op.getValueType();
9332 unsigned BitWidth = VT.getSizeInBits();
9333
9334 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9335 SDValue X = Op.getOperand(0);
9336 SDValue Y = Op.getOperand(1);
9337 SDValue Z = Op.getOperand(2);
9338 EVT AmtVT = Z.getValueType();
9339
9340 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9341 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9342 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9343 // on PowerPC shift by BW being well defined.
9344 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9345 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9346 SDValue SubZ =
9347 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9348 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9349 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9350 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9351}
9352
9353//===----------------------------------------------------------------------===//
9354// Vector related lowering.
9355//
9356
9357/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9358/// element size of SplatSize. Cast the result to VT.
9359static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9360 SelectionDAG &DAG, const SDLoc &dl) {
9361 static const MVT VTys[] = { // canonical VT to use for each size.
9362 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9363 };
9364
9365 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9366
9367 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9368 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9369 SplatSize = 1;
9370 Val = 0xFF;
9371 }
9372
9373 EVT CanonicalVT = VTys[SplatSize-1];
9374
9375 // Build a canonical splat for this value.
9376 // Explicitly truncate APInt here, as this API is used with a mix of
9377 // signed and unsigned values.
9378 return DAG.getBitcast(
9379 ReqVT,
9380 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9381}
9382
9383/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9384/// specified intrinsic ID.
9386 const SDLoc &dl, EVT DestVT = MVT::Other) {
9387 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9388 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9389 DAG.getConstant(IID, dl, MVT::i32), Op);
9390}
9391
9392/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9393/// specified intrinsic ID.
9394static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9395 SelectionDAG &DAG, const SDLoc &dl,
9396 EVT DestVT = MVT::Other) {
9397 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9398 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9399 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9400}
9401
9402/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9403/// specified intrinsic ID.
9404static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9405 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9406 EVT DestVT = MVT::Other) {
9407 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9408 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9409 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9410}
9411
9412/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9413/// amount. The result has the specified value type.
9414static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9415 SelectionDAG &DAG, const SDLoc &dl) {
9416 // Force LHS/RHS to be the right type.
9417 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9418 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9419
9420 int Ops[16];
9421 for (unsigned i = 0; i != 16; ++i)
9422 Ops[i] = i + Amt;
9423 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9424 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9425}
9426
9427/// Do we have an efficient pattern in a .td file for this node?
9428///
9429/// \param V - pointer to the BuildVectorSDNode being matched
9430/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9431///
9432/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9433/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9434/// the opposite is true (expansion is beneficial) are:
9435/// - The node builds a vector out of integers that are not 32 or 64-bits
9436/// - The node builds a vector out of constants
9437/// - The node is a "load-and-splat"
9438/// In all other cases, we will choose to keep the BUILD_VECTOR.
9440 bool HasDirectMove,
9441 bool HasP8Vector) {
9442 EVT VecVT = V->getValueType(0);
9443 bool RightType = VecVT == MVT::v2f64 ||
9444 (HasP8Vector && VecVT == MVT::v4f32) ||
9445 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9446 if (!RightType)
9447 return false;
9448
9449 bool IsSplat = true;
9450 bool IsLoad = false;
9451 SDValue Op0 = V->getOperand(0);
9452
9453 // This function is called in a block that confirms the node is not a constant
9454 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9455 // different constants.
9456 if (V->isConstant())
9457 return false;
9458 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9459 if (V->getOperand(i).isUndef())
9460 return false;
9461 // We want to expand nodes that represent load-and-splat even if the
9462 // loaded value is a floating point truncation or conversion to int.
9463 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9464 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9465 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9466 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9467 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9468 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9469 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9470 IsLoad = true;
9471 // If the operands are different or the input is not a load and has more
9472 // uses than just this BV node, then it isn't a splat.
9473 if (V->getOperand(i) != Op0 ||
9474 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9475 IsSplat = false;
9476 }
9477 return !(IsSplat && IsLoad);
9478}
9479
9480// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9481SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9482
9483 SDLoc dl(Op);
9484 SDValue Op0 = Op->getOperand(0);
9485
9486 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9487 (Op.getValueType() != MVT::f128))
9488 return SDValue();
9489
9490 SDValue Lo = Op0.getOperand(0);
9491 SDValue Hi = Op0.getOperand(1);
9492 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9493 return SDValue();
9494
9495 if (!Subtarget.isLittleEndian())
9496 std::swap(Lo, Hi);
9497
9498 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9499}
9500
9501static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9502 const SDValue *InputLoad = &Op;
9503 while (InputLoad->getOpcode() == ISD::BITCAST)
9504 InputLoad = &InputLoad->getOperand(0);
9505 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9507 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9508 InputLoad = &InputLoad->getOperand(0);
9509 }
9510 if (InputLoad->getOpcode() != ISD::LOAD)
9511 return nullptr;
9512 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9513 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9514}
9515
9516// Convert the argument APFloat to a single precision APFloat if there is no
9517// loss in information during the conversion to single precision APFloat and the
9518// resulting number is not a denormal number. Return true if successful.
9520 APFloat APFloatToConvert = ArgAPFloat;
9521 bool LosesInfo = true;
9523 &LosesInfo);
9524 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9525 if (Success)
9526 ArgAPFloat = APFloatToConvert;
9527 return Success;
9528}
9529
9530// Bitcast the argument APInt to a double and convert it to a single precision
9531// APFloat, bitcast the APFloat to an APInt and assign it to the original
9532// argument if there is no loss in information during the conversion from
9533// double to single precision APFloat and the resulting number is not a denormal
9534// number. Return true if successful.
9536 double DpValue = ArgAPInt.bitsToDouble();
9537 APFloat APFloatDp(DpValue);
9538 bool Success = convertToNonDenormSingle(APFloatDp);
9539 if (Success)
9540 ArgAPInt = APFloatDp.bitcastToAPInt();
9541 return Success;
9542}
9543
9544// Nondestructive check for convertTonNonDenormSingle.
9546 // Only convert if it loses info, since XXSPLTIDP should
9547 // handle the other case.
9548 APFloat APFloatToConvert = ArgAPFloat;
9549 bool LosesInfo = true;
9551 &LosesInfo);
9552
9553 return (!LosesInfo && !APFloatToConvert.isDenormal());
9554}
9555
9556static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9557 unsigned &Opcode) {
9558 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9559 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9560 return false;
9561
9562 EVT Ty = Op->getValueType(0);
9563 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9564 // as we cannot handle extending loads for these types.
9565 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9566 ISD::isNON_EXTLoad(InputNode))
9567 return true;
9568
9569 EVT MemVT = InputNode->getMemoryVT();
9570 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9571 // memory VT is the same vector element VT type.
9572 // The loads feeding into the v8i16 and v16i8 types will be extending because
9573 // scalar i8/i16 are not legal types.
9574 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9575 (MemVT == Ty.getVectorElementType()))
9576 return true;
9577
9578 if (Ty == MVT::v2i64) {
9579 // Check the extend type, when the input type is i32, and the output vector
9580 // type is v2i64.
9581 if (MemVT == MVT::i32) {
9582 if (ISD::isZEXTLoad(InputNode))
9583 Opcode = PPCISD::ZEXT_LD_SPLAT;
9584 if (ISD::isSEXTLoad(InputNode))
9585 Opcode = PPCISD::SEXT_LD_SPLAT;
9586 }
9587 return true;
9588 }
9589 return false;
9590}
9591
9592// If this is a case we can't handle, return null and let the default
9593// expansion code take care of it. If we CAN select this case, and if it
9594// selects to a single instruction, return Op. Otherwise, if we can codegen
9595// this case more efficiently than a constant pool load, lower it to the
9596// sequence of ops that should be used.
9597SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9598 SelectionDAG &DAG) const {
9599 SDLoc dl(Op);
9600 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9601 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9602
9603 // Check if this is a splat of a constant value.
9604 APInt APSplatBits, APSplatUndef;
9605 unsigned SplatBitSize;
9606 bool HasAnyUndefs;
9607 bool BVNIsConstantSplat =
9608 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9609 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9610
9611 // If it is a splat of a double, check if we can shrink it to a 32 bit
9612 // non-denormal float which when converted back to double gives us the same
9613 // double. This is to exploit the XXSPLTIDP instruction.
9614 // If we lose precision, we use XXSPLTI32DX.
9615 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9616 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9617 // Check the type first to short-circuit so we don't modify APSplatBits if
9618 // this block isn't executed.
9619 if ((Op->getValueType(0) == MVT::v2f64) &&
9620 convertToNonDenormSingle(APSplatBits)) {
9621 SDValue SplatNode = DAG.getNode(
9622 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9623 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9624 return DAG.getBitcast(Op.getValueType(), SplatNode);
9625 } else {
9626 // We may lose precision, so we have to use XXSPLTI32DX.
9627
9628 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9629 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9630 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9631
9632 if (!Hi || !Lo)
9633 // If either load is 0, then we should generate XXLXOR to set to 0.
9634 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9635
9636 if (Hi)
9637 SplatNode = DAG.getNode(
9638 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9639 DAG.getTargetConstant(0, dl, MVT::i32),
9640 DAG.getTargetConstant(Hi, dl, MVT::i32));
9641
9642 if (Lo)
9643 SplatNode =
9644 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9645 DAG.getTargetConstant(1, dl, MVT::i32),
9646 DAG.getTargetConstant(Lo, dl, MVT::i32));
9647
9648 return DAG.getBitcast(Op.getValueType(), SplatNode);
9649 }
9650 }
9651
9652 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9653 unsigned NewOpcode = PPCISD::LD_SPLAT;
9654
9655 // Handle load-and-splat patterns as we have instructions that will do this
9656 // in one go.
9657 if (DAG.isSplatValue(Op, true) &&
9658 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9659 const SDValue *InputLoad = &Op.getOperand(0);
9660 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9661
9662 // If the input load is an extending load, it will be an i32 -> i64
9663 // extending load and isValidSplatLoad() will update NewOpcode.
9664 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9665 unsigned ElementSize =
9666 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9667
9668 assert(((ElementSize == 2 * MemorySize)
9669 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9670 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9671 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9672 "Unmatched element size and opcode!\n");
9673
9674 // Checking for a single use of this load, we have to check for vector
9675 // width (128 bits) / ElementSize uses (since each operand of the
9676 // BUILD_VECTOR is a separate use of the value.
9677 unsigned NumUsesOfInputLD = 128 / ElementSize;
9678 for (SDValue BVInOp : Op->ops())
9679 if (BVInOp.isUndef())
9680 NumUsesOfInputLD--;
9681
9682 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9683 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9684 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9685 // 15", but function IsValidSplatLoad() now will only return true when
9686 // the data at index 0 is not nullptr. So we will not get into trouble for
9687 // these cases.
9688 //
9689 // case 1 - lfiwzx/lfiwax
9690 // 1.1: load result is i32 and is sign/zero extend to i64;
9691 // 1.2: build a v2i64 vector type with above loaded value;
9692 // 1.3: the vector has only one value at index 0, others are all undef;
9693 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9694 if (NumUsesOfInputLD == 1 &&
9695 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9696 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9697 Subtarget.hasLFIWAX()))
9698 return SDValue();
9699
9700 // case 2 - lxvr[hb]x
9701 // 2.1: load result is at most i16;
9702 // 2.2: build a vector with above loaded value;
9703 // 2.3: the vector has only one value at index 0, others are all undef;
9704 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9705 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9706 Subtarget.isISA3_1() && ElementSize <= 16)
9707 return SDValue();
9708
9709 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9710 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9711 Subtarget.hasVSX()) {
9712 SDValue Ops[] = {
9713 LD->getChain(), // Chain
9714 LD->getBasePtr(), // Ptr
9715 DAG.getValueType(Op.getValueType()) // VT
9716 };
9717 SDValue LdSplt = DAG.getMemIntrinsicNode(
9718 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9719 LD->getMemoryVT(), LD->getMemOperand());
9720 // Replace all uses of the output chain of the original load with the
9721 // output chain of the new load.
9722 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9723 LdSplt.getValue(1));
9724 return LdSplt;
9725 }
9726 }
9727
9728 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9729 // 32-bits can be lowered to VSX instructions under certain conditions.
9730 // Without VSX, there is no pattern more efficient than expanding the node.
9731 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9732 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9733 Subtarget.hasP8Vector()))
9734 return Op;
9735 return SDValue();
9736 }
9737
9738 uint64_t SplatBits = APSplatBits.getZExtValue();
9739 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9740 unsigned SplatSize = SplatBitSize / 8;
9741
9742 // First, handle single instruction cases.
9743
9744 // All zeros?
9745 if (SplatBits == 0) {
9746 // Canonicalize all zero vectors to be v4i32.
9747 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9748 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9749 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9750 }
9751 return Op;
9752 }
9753
9754 // We have XXSPLTIW for constant splats four bytes wide.
9755 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9756 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9757 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9758 // turned into a 4-byte splat of 0xABABABAB.
9759 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9760 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9761 Op.getValueType(), DAG, dl);
9762
9763 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9764 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9765 dl);
9766
9767 // We have XXSPLTIB for constant splats one byte wide.
9768 if (Subtarget.hasP9Vector() && SplatSize == 1)
9769 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9770 dl);
9771
9772 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9773 int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
9774 if (SextVal >= -16 && SextVal <= 15)
9775 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9776 dl);
9777
9778 // Two instruction sequences.
9779
9780 // If this value is in the range [-32,30] and is even, use:
9781 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9782 // If this value is in the range [17,31] and is odd, use:
9783 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9784 // If this value is in the range [-31,-17] and is odd, use:
9785 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9786 // Note the last two are three-instruction sequences.
9787 if (SextVal >= -32 && SextVal <= 31) {
9788 // To avoid having these optimizations undone by constant folding,
9789 // we convert to a pseudo that will be expanded later into one of
9790 // the above forms.
9791 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9792 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9793 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9794 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9795 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9796 if (VT == Op.getValueType())
9797 return RetVal;
9798 else
9799 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9800 }
9801
9802 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9803 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9804 // for fneg/fabs.
9805 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9806 // Make -1 and vspltisw -1:
9807 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9808
9809 // Make the VSLW intrinsic, computing 0x8000_0000.
9810 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9811 OnesV, DAG, dl);
9812
9813 // xor by OnesV to invert it.
9814 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9815 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9816 }
9817
9818 // Check to see if this is a wide variety of vsplti*, binop self cases.
9819 static const signed char SplatCsts[] = {
9820 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9821 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9822 };
9823
9824 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9825 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9826 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9827 int i = SplatCsts[idx];
9828
9829 // Figure out what shift amount will be used by altivec if shifted by i in
9830 // this splat size.
9831 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9832
9833 // vsplti + shl self.
9834 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9835 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9836 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9837 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9838 Intrinsic::ppc_altivec_vslw
9839 };
9840 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9841 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9842 }
9843
9844 // vsplti + srl self.
9845 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9846 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9847 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9848 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9849 Intrinsic::ppc_altivec_vsrw
9850 };
9851 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9852 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9853 }
9854
9855 // vsplti + rol self.
9856 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9857 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9858 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9859 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9860 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9861 Intrinsic::ppc_altivec_vrlw
9862 };
9863 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9864 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9865 }
9866
9867 // t = vsplti c, result = vsldoi t, t, 1
9868 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9869 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9870 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9871 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9872 }
9873 // t = vsplti c, result = vsldoi t, t, 2
9874 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9875 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9876 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9877 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9878 }
9879 // t = vsplti c, result = vsldoi t, t, 3
9880 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9881 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9882 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9883 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9884 }
9885 }
9886
9887 return SDValue();
9888}
9889
9890/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9891/// the specified operations to build the shuffle.
9892static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9893 SDValue RHS, SelectionDAG &DAG,
9894 const SDLoc &dl) {
9895 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9896 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9897 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9898
9899 enum {
9900 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9901 OP_VMRGHW,
9902 OP_VMRGLW,
9903 OP_VSPLTISW0,
9904 OP_VSPLTISW1,
9905 OP_VSPLTISW2,
9906 OP_VSPLTISW3,
9907 OP_VSLDOI4,
9908 OP_VSLDOI8,
9909 OP_VSLDOI12
9910 };
9911
9912 if (OpNum == OP_COPY) {
9913 if (LHSID == (1*9+2)*9+3) return LHS;
9914 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9915 return RHS;
9916 }
9917
9918 SDValue OpLHS, OpRHS;
9919 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9920 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9921
9922 int ShufIdxs[16];
9923 switch (OpNum) {
9924 default: llvm_unreachable("Unknown i32 permute!");
9925 case OP_VMRGHW:
9926 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9927 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9928 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9929 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9930 break;
9931 case OP_VMRGLW:
9932 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9933 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9934 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9935 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9936 break;
9937 case OP_VSPLTISW0:
9938 for (unsigned i = 0; i != 16; ++i)
9939 ShufIdxs[i] = (i&3)+0;
9940 break;
9941 case OP_VSPLTISW1:
9942 for (unsigned i = 0; i != 16; ++i)
9943 ShufIdxs[i] = (i&3)+4;
9944 break;
9945 case OP_VSPLTISW2:
9946 for (unsigned i = 0; i != 16; ++i)
9947 ShufIdxs[i] = (i&3)+8;
9948 break;
9949 case OP_VSPLTISW3:
9950 for (unsigned i = 0; i != 16; ++i)
9951 ShufIdxs[i] = (i&3)+12;
9952 break;
9953 case OP_VSLDOI4:
9954 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9955 case OP_VSLDOI8:
9956 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9957 case OP_VSLDOI12:
9958 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9959 }
9960 EVT VT = OpLHS.getValueType();
9961 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9962 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9963 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9964 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9965}
9966
9967/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9968/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9969/// SDValue.
9970SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9971 SelectionDAG &DAG) const {
9972 const unsigned BytesInVector = 16;
9973 bool IsLE = Subtarget.isLittleEndian();
9974 SDLoc dl(N);
9975 SDValue V1 = N->getOperand(0);
9976 SDValue V2 = N->getOperand(1);
9977 unsigned ShiftElts = 0, InsertAtByte = 0;
9978 bool Swap = false;
9979
9980 // Shifts required to get the byte we want at element 7.
9981 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9982 0, 15, 14, 13, 12, 11, 10, 9};
9983 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9984 1, 2, 3, 4, 5, 6, 7, 8};
9985
9986 ArrayRef<int> Mask = N->getMask();
9987 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9988
9989 // For each mask element, find out if we're just inserting something
9990 // from V2 into V1 or vice versa.
9991 // Possible permutations inserting an element from V2 into V1:
9992 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9993 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9994 // ...
9995 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9996 // Inserting from V1 into V2 will be similar, except mask range will be
9997 // [16,31].
9998
9999 bool FoundCandidate = false;
10000 // If both vector operands for the shuffle are the same vector, the mask
10001 // will contain only elements from the first one and the second one will be
10002 // undef.
10003 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10004 // Go through the mask of half-words to find an element that's being moved
10005 // from one vector to the other.
10006 for (unsigned i = 0; i < BytesInVector; ++i) {
10007 unsigned CurrentElement = Mask[i];
10008 // If 2nd operand is undefined, we should only look for element 7 in the
10009 // Mask.
10010 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10011 continue;
10012
10013 bool OtherElementsInOrder = true;
10014 // Examine the other elements in the Mask to see if they're in original
10015 // order.
10016 for (unsigned j = 0; j < BytesInVector; ++j) {
10017 if (j == i)
10018 continue;
10019 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10020 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10021 // in which we always assume we're always picking from the 1st operand.
10022 int MaskOffset =
10023 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10024 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10025 OtherElementsInOrder = false;
10026 break;
10027 }
10028 }
10029 // If other elements are in original order, we record the number of shifts
10030 // we need to get the element we want into element 7. Also record which byte
10031 // in the vector we should insert into.
10032 if (OtherElementsInOrder) {
10033 // If 2nd operand is undefined, we assume no shifts and no swapping.
10034 if (V2.isUndef()) {
10035 ShiftElts = 0;
10036 Swap = false;
10037 } else {
10038 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10039 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10040 : BigEndianShifts[CurrentElement & 0xF];
10041 Swap = CurrentElement < BytesInVector;
10042 }
10043 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10044 FoundCandidate = true;
10045 break;
10046 }
10047 }
10048
10049 if (!FoundCandidate)
10050 return SDValue();
10051
10052 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10053 // optionally with VECSHL if shift is required.
10054 if (Swap)
10055 std::swap(V1, V2);
10056 if (V2.isUndef())
10057 V2 = V1;
10058 if (ShiftElts) {
10059 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10060 DAG.getConstant(ShiftElts, dl, MVT::i32));
10061 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10062 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10063 }
10064 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10065 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10066}
10067
10068/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10069/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10070/// SDValue.
10071SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10072 SelectionDAG &DAG) const {
10073 const unsigned NumHalfWords = 8;
10074 const unsigned BytesInVector = NumHalfWords * 2;
10075 // Check that the shuffle is on half-words.
10076 if (!isNByteElemShuffleMask(N, 2, 1))
10077 return SDValue();
10078
10079 bool IsLE = Subtarget.isLittleEndian();
10080 SDLoc dl(N);
10081 SDValue V1 = N->getOperand(0);
10082 SDValue V2 = N->getOperand(1);
10083 unsigned ShiftElts = 0, InsertAtByte = 0;
10084 bool Swap = false;
10085
10086 // Shifts required to get the half-word we want at element 3.
10087 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10088 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10089
10090 uint32_t Mask = 0;
10091 uint32_t OriginalOrderLow = 0x1234567;
10092 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10093 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10094 // 32-bit space, only need 4-bit nibbles per element.
10095 for (unsigned i = 0; i < NumHalfWords; ++i) {
10096 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10097 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10098 }
10099
10100 // For each mask element, find out if we're just inserting something
10101 // from V2 into V1 or vice versa. Possible permutations inserting an element
10102 // from V2 into V1:
10103 // X, 1, 2, 3, 4, 5, 6, 7
10104 // 0, X, 2, 3, 4, 5, 6, 7
10105 // 0, 1, X, 3, 4, 5, 6, 7
10106 // 0, 1, 2, X, 4, 5, 6, 7
10107 // 0, 1, 2, 3, X, 5, 6, 7
10108 // 0, 1, 2, 3, 4, X, 6, 7
10109 // 0, 1, 2, 3, 4, 5, X, 7
10110 // 0, 1, 2, 3, 4, 5, 6, X
10111 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10112
10113 bool FoundCandidate = false;
10114 // Go through the mask of half-words to find an element that's being moved
10115 // from one vector to the other.
10116 for (unsigned i = 0; i < NumHalfWords; ++i) {
10117 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10118 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10119 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10120 uint32_t TargetOrder = 0x0;
10121
10122 // If both vector operands for the shuffle are the same vector, the mask
10123 // will contain only elements from the first one and the second one will be
10124 // undef.
10125 if (V2.isUndef()) {
10126 ShiftElts = 0;
10127 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10128 TargetOrder = OriginalOrderLow;
10129 Swap = false;
10130 // Skip if not the correct element or mask of other elements don't equal
10131 // to our expected order.
10132 if (MaskOneElt == VINSERTHSrcElem &&
10133 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10134 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10135 FoundCandidate = true;
10136 break;
10137 }
10138 } else { // If both operands are defined.
10139 // Target order is [8,15] if the current mask is between [0,7].
10140 TargetOrder =
10141 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10142 // Skip if mask of other elements don't equal our expected order.
10143 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10144 // We only need the last 3 bits for the number of shifts.
10145 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10146 : BigEndianShifts[MaskOneElt & 0x7];
10147 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10148 Swap = MaskOneElt < NumHalfWords;
10149 FoundCandidate = true;
10150 break;
10151 }
10152 }
10153 }
10154
10155 if (!FoundCandidate)
10156 return SDValue();
10157
10158 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10159 // optionally with VECSHL if shift is required.
10160 if (Swap)
10161 std::swap(V1, V2);
10162 if (V2.isUndef())
10163 V2 = V1;
10164 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10165 if (ShiftElts) {
10166 // Double ShiftElts because we're left shifting on v16i8 type.
10167 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10168 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10169 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10170 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10171 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10172 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10173 }
10174 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10175 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10176 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10177 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10178}
10179
10180/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10181/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10182/// return the default SDValue.
10183SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10184 SelectionDAG &DAG) const {
10185 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10186 // to v16i8. Peek through the bitcasts to get the actual operands.
10189
10190 auto ShuffleMask = SVN->getMask();
10191 SDValue VecShuffle(SVN, 0);
10192 SDLoc DL(SVN);
10193
10194 // Check that we have a four byte shuffle.
10195 if (!isNByteElemShuffleMask(SVN, 4, 1))
10196 return SDValue();
10197
10198 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10199 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10200 std::swap(LHS, RHS);
10202 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10203 if (!CommutedSV)
10204 return SDValue();
10205 ShuffleMask = CommutedSV->getMask();
10206 }
10207
10208 // Ensure that the RHS is a vector of constants.
10209 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10210 if (!BVN)
10211 return SDValue();
10212
10213 // Check if RHS is a splat of 4-bytes (or smaller).
10214 APInt APSplatValue, APSplatUndef;
10215 unsigned SplatBitSize;
10216 bool HasAnyUndefs;
10217 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10218 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10219 SplatBitSize > 32)
10220 return SDValue();
10221
10222 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10223 // The instruction splats a constant C into two words of the source vector
10224 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10225 // Thus we check that the shuffle mask is the equivalent of
10226 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10227 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10228 // within each word are consecutive, so we only need to check the first byte.
10229 SDValue Index;
10230 bool IsLE = Subtarget.isLittleEndian();
10231 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10232 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10233 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10234 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10235 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10236 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10237 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10238 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10239 else
10240 return SDValue();
10241
10242 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10243 // for XXSPLTI32DX.
10244 unsigned SplatVal = APSplatValue.getZExtValue();
10245 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10246 SplatVal |= (SplatVal << SplatBitSize);
10247
10248 SDValue SplatNode = DAG.getNode(
10249 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10250 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10251 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10252}
10253
10254/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10255/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10256/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10257/// i.e (or (shl x, C1), (srl x, 128-C1)).
10258SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10259 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10260 assert(Op.getValueType() == MVT::v1i128 &&
10261 "Only set v1i128 as custom, other type shouldn't reach here!");
10262 SDLoc dl(Op);
10263 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10264 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10265 unsigned SHLAmt = N1.getConstantOperandVal(0);
10266 if (SHLAmt % 8 == 0) {
10267 std::array<int, 16> Mask;
10268 std::iota(Mask.begin(), Mask.end(), 0);
10269 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10270 if (SDValue Shuffle =
10271 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10272 DAG.getUNDEF(MVT::v16i8), Mask))
10273 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10274 }
10275 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10276 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10277 DAG.getConstant(SHLAmt, dl, MVT::i32));
10278 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10279 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10280 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10281 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10282}
10283
10284/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10285/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10286/// return the code it can be lowered into. Worst case, it can always be
10287/// lowered into a vperm.
10288SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10289 SelectionDAG &DAG) const {
10290 SDLoc dl(Op);
10291 SDValue V1 = Op.getOperand(0);
10292 SDValue V2 = Op.getOperand(1);
10293 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10294
10295 // Any nodes that were combined in the target-independent combiner prior
10296 // to vector legalization will not be sent to the target combine. Try to
10297 // combine it here.
10298 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10299 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10300 return NewShuffle;
10301 Op = NewShuffle;
10302 SVOp = cast<ShuffleVectorSDNode>(Op);
10303 V1 = Op.getOperand(0);
10304 V2 = Op.getOperand(1);
10305 }
10306 EVT VT = Op.getValueType();
10307 bool isLittleEndian = Subtarget.isLittleEndian();
10308
10309 unsigned ShiftElts, InsertAtByte;
10310 bool Swap = false;
10311
10312 // If this is a load-and-splat, we can do that with a single instruction
10313 // in some cases. However if the load has multiple uses, we don't want to
10314 // combine it because that will just produce multiple loads.
10315 bool IsPermutedLoad = false;
10316 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10317 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10318 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10319 InputLoad->hasOneUse()) {
10320 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10321 int SplatIdx =
10322 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10323
10324 // The splat index for permuted loads will be in the left half of the vector
10325 // which is strictly wider than the loaded value by 8 bytes. So we need to
10326 // adjust the splat index to point to the correct address in memory.
10327 if (IsPermutedLoad) {
10328 assert((isLittleEndian || IsFourByte) &&
10329 "Unexpected size for permuted load on big endian target");
10330 SplatIdx += IsFourByte ? 2 : 1;
10331 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10332 "Splat of a value outside of the loaded memory");
10333 }
10334
10335 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10336 // For 4-byte load-and-splat, we need Power9.
10337 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10338 uint64_t Offset = 0;
10339 if (IsFourByte)
10340 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10341 else
10342 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10343
10344 // If the width of the load is the same as the width of the splat,
10345 // loading with an offset would load the wrong memory.
10346 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10347 Offset = 0;
10348
10349 SDValue BasePtr = LD->getBasePtr();
10350 if (Offset != 0)
10352 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10353 SDValue Ops[] = {
10354 LD->getChain(), // Chain
10355 BasePtr, // BasePtr
10356 DAG.getValueType(Op.getValueType()) // VT
10357 };
10358 SDVTList VTL =
10359 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10360 SDValue LdSplt =
10362 Ops, LD->getMemoryVT(), LD->getMemOperand());
10363 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10364 if (LdSplt.getValueType() != SVOp->getValueType(0))
10365 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10366 return LdSplt;
10367 }
10368 }
10369
10370 // All v2i64 and v2f64 shuffles are legal
10371 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10372 return Op;
10373
10374 if (Subtarget.hasP9Vector() &&
10375 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10376 isLittleEndian)) {
10377 if (V2.isUndef())
10378 V2 = V1;
10379 else if (Swap)
10380 std::swap(V1, V2);
10381 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10382 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10383 if (ShiftElts) {
10384 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10385 DAG.getConstant(ShiftElts, dl, MVT::i32));
10386 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10387 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10388 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10389 }
10390 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10391 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10392 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10393 }
10394
10395 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10396 SDValue SplatInsertNode;
10397 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10398 return SplatInsertNode;
10399 }
10400
10401 if (Subtarget.hasP9Altivec()) {
10402 SDValue NewISDNode;
10403 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10404 return NewISDNode;
10405
10406 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10407 return NewISDNode;
10408 }
10409
10410 if (Subtarget.hasVSX() &&
10411 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10412 if (Swap)
10413 std::swap(V1, V2);
10414 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10415 SDValue Conv2 =
10416 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10417
10418 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10419 DAG.getConstant(ShiftElts, dl, MVT::i32));
10420 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10421 }
10422
10423 if (Subtarget.hasVSX() &&
10424 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10425 if (Swap)
10426 std::swap(V1, V2);
10427 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10428 SDValue Conv2 =
10429 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10430
10431 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10432 DAG.getConstant(ShiftElts, dl, MVT::i32));
10433 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10434 }
10435
10436 if (Subtarget.hasP9Vector()) {
10437 if (PPC::isXXBRHShuffleMask(SVOp)) {
10438 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10439 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10440 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10441 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10442 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10443 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10444 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10445 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10446 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10447 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10448 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10449 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10450 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10451 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10452 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10453 }
10454 }
10455
10456 if (Subtarget.hasVSX()) {
10457 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10458 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10459
10460 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10461 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10462 DAG.getConstant(SplatIdx, dl, MVT::i32));
10463 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10464 }
10465
10466 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10467 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10468 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10469 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10470 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10471 }
10472 }
10473
10474 // Cases that are handled by instructions that take permute immediates
10475 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10476 // selected by the instruction selector.
10477 if (V2.isUndef()) {
10478 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10479 PPC::isSplatShuffleMask(SVOp, 2) ||
10480 PPC::isSplatShuffleMask(SVOp, 4) ||
10481 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10482 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10483 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10484 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10485 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10486 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10487 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10488 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10489 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10490 (Subtarget.hasP8Altivec() && (
10491 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10492 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10493 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10494 return Op;
10495 }
10496 }
10497
10498 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10499 // and produce a fixed permutation. If any of these match, do not lower to
10500 // VPERM.
10501 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10502 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10503 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10504 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10505 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10506 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10507 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10508 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10509 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10510 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10511 (Subtarget.hasP8Altivec() && (
10512 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10513 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10514 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10515 return Op;
10516
10517 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10518 // perfect shuffle table to emit an optimal matching sequence.
10519 ArrayRef<int> PermMask = SVOp->getMask();
10520
10521 if (!DisablePerfectShuffle && !isLittleEndian) {
10522 unsigned PFIndexes[4];
10523 bool isFourElementShuffle = true;
10524 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10525 ++i) { // Element number
10526 unsigned EltNo = 8; // Start out undef.
10527 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10528 if (PermMask[i * 4 + j] < 0)
10529 continue; // Undef, ignore it.
10530
10531 unsigned ByteSource = PermMask[i * 4 + j];
10532 if ((ByteSource & 3) != j) {
10533 isFourElementShuffle = false;
10534 break;
10535 }
10536
10537 if (EltNo == 8) {
10538 EltNo = ByteSource / 4;
10539 } else if (EltNo != ByteSource / 4) {
10540 isFourElementShuffle = false;
10541 break;
10542 }
10543 }
10544 PFIndexes[i] = EltNo;
10545 }
10546
10547 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10548 // perfect shuffle vector to determine if it is cost effective to do this as
10549 // discrete instructions, or whether we should use a vperm.
10550 // For now, we skip this for little endian until such time as we have a
10551 // little-endian perfect shuffle table.
10552 if (isFourElementShuffle) {
10553 // Compute the index in the perfect shuffle table.
10554 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10555 PFIndexes[2] * 9 + PFIndexes[3];
10556
10557 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10558 unsigned Cost = (PFEntry >> 30);
10559
10560 // Determining when to avoid vperm is tricky. Many things affect the cost
10561 // of vperm, particularly how many times the perm mask needs to be
10562 // computed. For example, if the perm mask can be hoisted out of a loop or
10563 // is already used (perhaps because there are multiple permutes with the
10564 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10565 // permute mask out of the loop requires an extra register.
10566 //
10567 // As a compromise, we only emit discrete instructions if the shuffle can
10568 // be generated in 3 or fewer operations. When we have loop information
10569 // available, if this block is within a loop, we should avoid using vperm
10570 // for 3-operation perms and use a constant pool load instead.
10571 if (Cost < 3)
10572 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10573 }
10574 }
10575
10576 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10577 // vector that will get spilled to the constant pool.
10578 if (V2.isUndef()) V2 = V1;
10579
10580 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10581}
10582
10583SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10584 ArrayRef<int> PermMask, EVT VT,
10585 SDValue V1, SDValue V2) const {
10586 unsigned Opcode = PPCISD::VPERM;
10587 EVT ValType = V1.getValueType();
10588 SDLoc dl(Op);
10589 bool NeedSwap = false;
10590 bool isLittleEndian = Subtarget.isLittleEndian();
10591 bool isPPC64 = Subtarget.isPPC64();
10592
10593 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10594 (V1->hasOneUse() || V2->hasOneUse())) {
10595 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10596 "XXPERM instead\n");
10597 Opcode = PPCISD::XXPERM;
10598
10599 // The second input to XXPERM is also an output so if the second input has
10600 // multiple uses then copying is necessary, as a result we want the
10601 // single-use operand to be used as the second input to prevent copying.
10602 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10603 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10604 std::swap(V1, V2);
10605 NeedSwap = !NeedSwap;
10606 }
10607 }
10608
10609 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10610 // that it is in input element units, not in bytes. Convert now.
10611
10612 // For little endian, the order of the input vectors is reversed, and
10613 // the permutation mask is complemented with respect to 31. This is
10614 // necessary to produce proper semantics with the big-endian-based vperm
10615 // instruction.
10616 EVT EltVT = V1.getValueType().getVectorElementType();
10617 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10618
10619 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10620 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10621
10622 /*
10623 Vectors will be appended like so: [ V1 | v2 ]
10624 XXSWAPD on V1:
10625 [ A | B | C | D ] -> [ C | D | A | B ]
10626 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10627 i.e. index of A, B += 8, and index of C, D -= 8.
10628 XXSWAPD on V2:
10629 [ E | F | G | H ] -> [ G | H | E | F ]
10630 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10631 i.e. index of E, F += 8, index of G, H -= 8
10632 Swap V1 and V2:
10633 [ V1 | V2 ] -> [ V2 | V1 ]
10634 0-15 16-31 0-15 16-31
10635 i.e. index of V1 += 16, index of V2 -= 16
10636 */
10637
10638 SmallVector<SDValue, 16> ResultMask;
10639 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10640 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10641
10642 if (V1HasXXSWAPD) {
10643 if (SrcElt < 8)
10644 SrcElt += 8;
10645 else if (SrcElt < 16)
10646 SrcElt -= 8;
10647 }
10648 if (V2HasXXSWAPD) {
10649 if (SrcElt > 23)
10650 SrcElt -= 8;
10651 else if (SrcElt > 15)
10652 SrcElt += 8;
10653 }
10654 if (NeedSwap) {
10655 if (SrcElt < 16)
10656 SrcElt += 16;
10657 else
10658 SrcElt -= 16;
10659 }
10660 for (unsigned j = 0; j != BytesPerElement; ++j)
10661 if (isLittleEndian)
10662 ResultMask.push_back(
10663 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10664 else
10665 ResultMask.push_back(
10666 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10667 }
10668
10669 if (V1HasXXSWAPD) {
10670 dl = SDLoc(V1->getOperand(0));
10671 V1 = V1->getOperand(0)->getOperand(1);
10672 }
10673 if (V2HasXXSWAPD) {
10674 dl = SDLoc(V2->getOperand(0));
10675 V2 = V2->getOperand(0)->getOperand(1);
10676 }
10677
10678 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10679 if (ValType != MVT::v2f64)
10680 V1 = DAG.getBitcast(MVT::v2f64, V1);
10681 if (V2.getValueType() != MVT::v2f64)
10682 V2 = DAG.getBitcast(MVT::v2f64, V2);
10683 }
10684
10685 ShufflesHandledWithVPERM++;
10686 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10687 LLVM_DEBUG({
10688 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10689 if (Opcode == PPCISD::XXPERM) {
10690 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10691 } else {
10692 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10693 }
10694 SVOp->dump();
10695 dbgs() << "With the following permute control vector:\n";
10696 VPermMask.dump();
10697 });
10698
10699 if (Opcode == PPCISD::XXPERM)
10700 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10701
10702 // Only need to place items backwards in LE,
10703 // the mask was properly calculated.
10704 if (isLittleEndian)
10705 std::swap(V1, V2);
10706
10707 SDValue VPERMNode =
10708 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10709
10710 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10711 return VPERMNode;
10712}
10713
10714/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10715/// vector comparison. If it is, return true and fill in Opc/isDot with
10716/// information about the intrinsic.
10717static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10718 bool &isDot, const PPCSubtarget &Subtarget) {
10719 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10720 CompareOpc = -1;
10721 isDot = false;
10722 switch (IntrinsicID) {
10723 default:
10724 return false;
10725 // Comparison predicates.
10726 case Intrinsic::ppc_altivec_vcmpbfp_p:
10727 CompareOpc = 966;
10728 isDot = true;
10729 break;
10730 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10731 CompareOpc = 198;
10732 isDot = true;
10733 break;
10734 case Intrinsic::ppc_altivec_vcmpequb_p:
10735 CompareOpc = 6;
10736 isDot = true;
10737 break;
10738 case Intrinsic::ppc_altivec_vcmpequh_p:
10739 CompareOpc = 70;
10740 isDot = true;
10741 break;
10742 case Intrinsic::ppc_altivec_vcmpequw_p:
10743 CompareOpc = 134;
10744 isDot = true;
10745 break;
10746 case Intrinsic::ppc_altivec_vcmpequd_p:
10747 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10748 CompareOpc = 199;
10749 isDot = true;
10750 } else
10751 return false;
10752 break;
10753 case Intrinsic::ppc_altivec_vcmpneb_p:
10754 case Intrinsic::ppc_altivec_vcmpneh_p:
10755 case Intrinsic::ppc_altivec_vcmpnew_p:
10756 case Intrinsic::ppc_altivec_vcmpnezb_p:
10757 case Intrinsic::ppc_altivec_vcmpnezh_p:
10758 case Intrinsic::ppc_altivec_vcmpnezw_p:
10759 if (Subtarget.hasP9Altivec()) {
10760 switch (IntrinsicID) {
10761 default:
10762 llvm_unreachable("Unknown comparison intrinsic.");
10763 case Intrinsic::ppc_altivec_vcmpneb_p:
10764 CompareOpc = 7;
10765 break;
10766 case Intrinsic::ppc_altivec_vcmpneh_p:
10767 CompareOpc = 71;
10768 break;
10769 case Intrinsic::ppc_altivec_vcmpnew_p:
10770 CompareOpc = 135;
10771 break;
10772 case Intrinsic::ppc_altivec_vcmpnezb_p:
10773 CompareOpc = 263;
10774 break;
10775 case Intrinsic::ppc_altivec_vcmpnezh_p:
10776 CompareOpc = 327;
10777 break;
10778 case Intrinsic::ppc_altivec_vcmpnezw_p:
10779 CompareOpc = 391;
10780 break;
10781 }
10782 isDot = true;
10783 } else
10784 return false;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpgefp_p:
10787 CompareOpc = 454;
10788 isDot = true;
10789 break;
10790 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10791 CompareOpc = 710;
10792 isDot = true;
10793 break;
10794 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10795 CompareOpc = 774;
10796 isDot = true;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10799 CompareOpc = 838;
10800 isDot = true;
10801 break;
10802 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10803 CompareOpc = 902;
10804 isDot = true;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10807 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10808 CompareOpc = 967;
10809 isDot = true;
10810 } else
10811 return false;
10812 break;
10813 case Intrinsic::ppc_altivec_vcmpgtub_p:
10814 CompareOpc = 518;
10815 isDot = true;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10818 CompareOpc = 582;
10819 isDot = true;
10820 break;
10821 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10822 CompareOpc = 646;
10823 isDot = true;
10824 break;
10825 case Intrinsic::ppc_altivec_vcmpgtud_p:
10826 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10827 CompareOpc = 711;
10828 isDot = true;
10829 } else
10830 return false;
10831 break;
10832
10833 case Intrinsic::ppc_altivec_vcmpequq:
10834 case Intrinsic::ppc_altivec_vcmpgtsq:
10835 case Intrinsic::ppc_altivec_vcmpgtuq:
10836 if (!Subtarget.isISA3_1())
10837 return false;
10838 switch (IntrinsicID) {
10839 default:
10840 llvm_unreachable("Unknown comparison intrinsic.");
10841 case Intrinsic::ppc_altivec_vcmpequq:
10842 CompareOpc = 455;
10843 break;
10844 case Intrinsic::ppc_altivec_vcmpgtsq:
10845 CompareOpc = 903;
10846 break;
10847 case Intrinsic::ppc_altivec_vcmpgtuq:
10848 CompareOpc = 647;
10849 break;
10850 }
10851 break;
10852
10853 // VSX predicate comparisons use the same infrastructure
10854 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10855 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10856 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10857 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10858 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10859 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10860 if (Subtarget.hasVSX()) {
10861 switch (IntrinsicID) {
10862 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10863 CompareOpc = 99;
10864 break;
10865 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10866 CompareOpc = 115;
10867 break;
10868 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10869 CompareOpc = 107;
10870 break;
10871 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10872 CompareOpc = 67;
10873 break;
10874 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10875 CompareOpc = 83;
10876 break;
10877 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10878 CompareOpc = 75;
10879 break;
10880 }
10881 isDot = true;
10882 } else
10883 return false;
10884 break;
10885
10886 // Normal Comparisons.
10887 case Intrinsic::ppc_altivec_vcmpbfp:
10888 CompareOpc = 966;
10889 break;
10890 case Intrinsic::ppc_altivec_vcmpeqfp:
10891 CompareOpc = 198;
10892 break;
10893 case Intrinsic::ppc_altivec_vcmpequb:
10894 CompareOpc = 6;
10895 break;
10896 case Intrinsic::ppc_altivec_vcmpequh:
10897 CompareOpc = 70;
10898 break;
10899 case Intrinsic::ppc_altivec_vcmpequw:
10900 CompareOpc = 134;
10901 break;
10902 case Intrinsic::ppc_altivec_vcmpequd:
10903 if (Subtarget.hasP8Altivec())
10904 CompareOpc = 199;
10905 else
10906 return false;
10907 break;
10908 case Intrinsic::ppc_altivec_vcmpneb:
10909 case Intrinsic::ppc_altivec_vcmpneh:
10910 case Intrinsic::ppc_altivec_vcmpnew:
10911 case Intrinsic::ppc_altivec_vcmpnezb:
10912 case Intrinsic::ppc_altivec_vcmpnezh:
10913 case Intrinsic::ppc_altivec_vcmpnezw:
10914 if (Subtarget.hasP9Altivec())
10915 switch (IntrinsicID) {
10916 default:
10917 llvm_unreachable("Unknown comparison intrinsic.");
10918 case Intrinsic::ppc_altivec_vcmpneb:
10919 CompareOpc = 7;
10920 break;
10921 case Intrinsic::ppc_altivec_vcmpneh:
10922 CompareOpc = 71;
10923 break;
10924 case Intrinsic::ppc_altivec_vcmpnew:
10925 CompareOpc = 135;
10926 break;
10927 case Intrinsic::ppc_altivec_vcmpnezb:
10928 CompareOpc = 263;
10929 break;
10930 case Intrinsic::ppc_altivec_vcmpnezh:
10931 CompareOpc = 327;
10932 break;
10933 case Intrinsic::ppc_altivec_vcmpnezw:
10934 CompareOpc = 391;
10935 break;
10936 }
10937 else
10938 return false;
10939 break;
10940 case Intrinsic::ppc_altivec_vcmpgefp:
10941 CompareOpc = 454;
10942 break;
10943 case Intrinsic::ppc_altivec_vcmpgtfp:
10944 CompareOpc = 710;
10945 break;
10946 case Intrinsic::ppc_altivec_vcmpgtsb:
10947 CompareOpc = 774;
10948 break;
10949 case Intrinsic::ppc_altivec_vcmpgtsh:
10950 CompareOpc = 838;
10951 break;
10952 case Intrinsic::ppc_altivec_vcmpgtsw:
10953 CompareOpc = 902;
10954 break;
10955 case Intrinsic::ppc_altivec_vcmpgtsd:
10956 if (Subtarget.hasP8Altivec())
10957 CompareOpc = 967;
10958 else
10959 return false;
10960 break;
10961 case Intrinsic::ppc_altivec_vcmpgtub:
10962 CompareOpc = 518;
10963 break;
10964 case Intrinsic::ppc_altivec_vcmpgtuh:
10965 CompareOpc = 582;
10966 break;
10967 case Intrinsic::ppc_altivec_vcmpgtuw:
10968 CompareOpc = 646;
10969 break;
10970 case Intrinsic::ppc_altivec_vcmpgtud:
10971 if (Subtarget.hasP8Altivec())
10972 CompareOpc = 711;
10973 else
10974 return false;
10975 break;
10976 case Intrinsic::ppc_altivec_vcmpequq_p:
10977 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10978 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10979 if (!Subtarget.isISA3_1())
10980 return false;
10981 switch (IntrinsicID) {
10982 default:
10983 llvm_unreachable("Unknown comparison intrinsic.");
10984 case Intrinsic::ppc_altivec_vcmpequq_p:
10985 CompareOpc = 455;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10988 CompareOpc = 903;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10991 CompareOpc = 647;
10992 break;
10993 }
10994 isDot = true;
10995 break;
10996 }
10997 return true;
10998}
10999
11000/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11001/// lower, do it, otherwise return null.
11002SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11003 SelectionDAG &DAG) const {
11004 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11005
11006 SDLoc dl(Op);
11007
11008 switch (IntrinsicID) {
11009 case Intrinsic::thread_pointer:
11010 // Reads the thread pointer register, used for __builtin_thread_pointer.
11011 if (Subtarget.isPPC64())
11012 return DAG.getRegister(PPC::X13, MVT::i64);
11013 return DAG.getRegister(PPC::R2, MVT::i32);
11014
11015 case Intrinsic::ppc_rldimi: {
11016 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11017 SDValue Src = Op.getOperand(1);
11018 APInt Mask = Op.getConstantOperandAPInt(4);
11019 if (Mask.isZero())
11020 return Op.getOperand(2);
11021 if (Mask.isAllOnes())
11022 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11023 uint64_t SH = Op.getConstantOperandVal(3);
11024 unsigned MB = 0, ME = 0;
11025 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11026 report_fatal_error("invalid rldimi mask!");
11027 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11028 if (ME < 63 - SH) {
11029 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11030 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11031 } else if (ME > 63 - SH) {
11032 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11033 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11034 }
11035 return SDValue(
11036 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11037 {Op.getOperand(2), Src,
11038 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11039 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11040 0);
11041 }
11042
11043 case Intrinsic::ppc_rlwimi: {
11044 APInt Mask = Op.getConstantOperandAPInt(4);
11045 if (Mask.isZero())
11046 return Op.getOperand(2);
11047 if (Mask.isAllOnes())
11048 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11049 Op.getOperand(3));
11050 unsigned MB = 0, ME = 0;
11051 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11052 report_fatal_error("invalid rlwimi mask!");
11053 return SDValue(DAG.getMachineNode(
11054 PPC::RLWIMI, dl, MVT::i32,
11055 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11056 DAG.getTargetConstant(MB, dl, MVT::i32),
11057 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11058 0);
11059 }
11060
11061 case Intrinsic::ppc_rlwnm: {
11062 if (Op.getConstantOperandVal(3) == 0)
11063 return DAG.getConstant(0, dl, MVT::i32);
11064 unsigned MB = 0, ME = 0;
11065 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11066 report_fatal_error("invalid rlwnm mask!");
11067 return SDValue(
11068 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11069 {Op.getOperand(1), Op.getOperand(2),
11070 DAG.getTargetConstant(MB, dl, MVT::i32),
11071 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11072 0);
11073 }
11074
11075 case Intrinsic::ppc_mma_disassemble_acc: {
11076 if (Subtarget.isISAFuture()) {
11077 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11078 SDValue WideVec =
11079 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11080 Op.getOperand(1)),
11081 0);
11083 SDValue Value = SDValue(WideVec.getNode(), 0);
11084 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11085
11086 SDValue Extract;
11087 Extract = DAG.getNode(
11088 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11089 Subtarget.isLittleEndian() ? Value2 : Value,
11090 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11091 dl, getPointerTy(DAG.getDataLayout())));
11092 RetOps.push_back(Extract);
11093 Extract = DAG.getNode(
11094 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11095 Subtarget.isLittleEndian() ? Value2 : Value,
11096 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11097 dl, getPointerTy(DAG.getDataLayout())));
11098 RetOps.push_back(Extract);
11099 Extract = DAG.getNode(
11100 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11101 Subtarget.isLittleEndian() ? Value : Value2,
11102 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11103 dl, getPointerTy(DAG.getDataLayout())));
11104 RetOps.push_back(Extract);
11105 Extract = DAG.getNode(
11106 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11107 Subtarget.isLittleEndian() ? Value : Value2,
11108 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11109 dl, getPointerTy(DAG.getDataLayout())));
11110 RetOps.push_back(Extract);
11111 return DAG.getMergeValues(RetOps, dl);
11112 }
11113 [[fallthrough]];
11114 }
11115 case Intrinsic::ppc_vsx_disassemble_pair: {
11116 int NumVecs = 2;
11117 SDValue WideVec = Op.getOperand(1);
11118 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11119 NumVecs = 4;
11120 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11121 }
11123 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11124 SDValue Extract = DAG.getNode(
11125 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11126 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11127 : VecNo,
11128 dl, getPointerTy(DAG.getDataLayout())));
11129 RetOps.push_back(Extract);
11130 }
11131 return DAG.getMergeValues(RetOps, dl);
11132 }
11133
11134 case Intrinsic::ppc_mma_xxmfacc:
11135 case Intrinsic::ppc_mma_xxmtacc: {
11136 // Allow pre-isa-future subtargets to lower as normal.
11137 if (!Subtarget.isISAFuture())
11138 return SDValue();
11139 // The intrinsics for xxmtacc and xxmfacc take one argument of
11140 // type v512i1, for future cpu the corresponding wacc instruction
11141 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11142 // the need to produce the xxm[t|f]acc.
11143 SDValue WideVec = Op.getOperand(1);
11144 DAG.ReplaceAllUsesWith(Op, WideVec);
11145 return SDValue();
11146 }
11147
11148 case Intrinsic::ppc_unpack_longdouble: {
11149 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11150 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11151 "Argument of long double unpack must be 0 or 1!");
11152 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11153 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11154 Idx->getValueType(0)));
11155 }
11156
11157 case Intrinsic::ppc_compare_exp_lt:
11158 case Intrinsic::ppc_compare_exp_gt:
11159 case Intrinsic::ppc_compare_exp_eq:
11160 case Intrinsic::ppc_compare_exp_uo: {
11161 unsigned Pred;
11162 switch (IntrinsicID) {
11163 case Intrinsic::ppc_compare_exp_lt:
11164 Pred = PPC::PRED_LT;
11165 break;
11166 case Intrinsic::ppc_compare_exp_gt:
11167 Pred = PPC::PRED_GT;
11168 break;
11169 case Intrinsic::ppc_compare_exp_eq:
11170 Pred = PPC::PRED_EQ;
11171 break;
11172 case Intrinsic::ppc_compare_exp_uo:
11173 Pred = PPC::PRED_UN;
11174 break;
11175 }
11176 return SDValue(
11177 DAG.getMachineNode(
11178 PPC::SELECT_CC_I4, dl, MVT::i32,
11179 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11180 Op.getOperand(1), Op.getOperand(2)),
11181 0),
11182 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11183 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11184 0);
11185 }
11186 case Intrinsic::ppc_test_data_class: {
11187 EVT OpVT = Op.getOperand(1).getValueType();
11188 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11189 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11190 : PPC::XSTSTDCSP);
11191 return SDValue(
11192 DAG.getMachineNode(
11193 PPC::SELECT_CC_I4, dl, MVT::i32,
11194 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11195 Op.getOperand(1)),
11196 0),
11197 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11198 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11199 0);
11200 }
11201 case Intrinsic::ppc_fnmsub: {
11202 EVT VT = Op.getOperand(1).getValueType();
11203 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11204 return DAG.getNode(
11205 ISD::FNEG, dl, VT,
11206 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11207 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11208 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11209 Op.getOperand(2), Op.getOperand(3));
11210 }
11211 case Intrinsic::ppc_convert_f128_to_ppcf128:
11212 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11213 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11214 ? RTLIB::CONVERT_PPCF128_F128
11215 : RTLIB::CONVERT_F128_PPCF128;
11216 MakeLibCallOptions CallOptions;
11217 std::pair<SDValue, SDValue> Result =
11218 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11219 dl, SDValue());
11220 return Result.first;
11221 }
11222 case Intrinsic::ppc_maxfe:
11223 case Intrinsic::ppc_maxfl:
11224 case Intrinsic::ppc_maxfs:
11225 case Intrinsic::ppc_minfe:
11226 case Intrinsic::ppc_minfl:
11227 case Intrinsic::ppc_minfs: {
11228 EVT VT = Op.getValueType();
11229 assert(
11230 all_of(Op->ops().drop_front(4),
11231 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11232 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11233 (void)VT;
11235 if (IntrinsicID == Intrinsic::ppc_minfe ||
11236 IntrinsicID == Intrinsic::ppc_minfl ||
11237 IntrinsicID == Intrinsic::ppc_minfs)
11238 CC = ISD::SETLT;
11239 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11240 SDValue Res = Op.getOperand(I);
11241 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11242 Res =
11243 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11244 }
11245 return Res;
11246 }
11247 }
11248
11249 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11250 // opcode number of the comparison.
11251 int CompareOpc;
11252 bool isDot;
11253 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11254 return SDValue(); // Don't custom lower most intrinsics.
11255
11256 // If this is a non-dot comparison, make the VCMP node and we are done.
11257 if (!isDot) {
11258 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11259 Op.getOperand(1), Op.getOperand(2),
11260 DAG.getConstant(CompareOpc, dl, MVT::i32));
11261 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11262 }
11263
11264 // Create the PPCISD altivec 'dot' comparison node.
11265 SDValue Ops[] = {
11266 Op.getOperand(2), // LHS
11267 Op.getOperand(3), // RHS
11268 DAG.getConstant(CompareOpc, dl, MVT::i32)
11269 };
11270 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11271 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11272
11273 // Unpack the result based on how the target uses it.
11274 unsigned BitNo; // Bit # of CR6.
11275 bool InvertBit; // Invert result?
11276 unsigned Bitx;
11277 unsigned SetOp;
11278 switch (Op.getConstantOperandVal(1)) {
11279 default: // Can't happen, don't crash on invalid number though.
11280 case 0: // Return the value of the EQ bit of CR6.
11281 BitNo = 0;
11282 InvertBit = false;
11283 Bitx = PPC::sub_eq;
11284 SetOp = PPCISD::SETBC;
11285 break;
11286 case 1: // Return the inverted value of the EQ bit of CR6.
11287 BitNo = 0;
11288 InvertBit = true;
11289 Bitx = PPC::sub_eq;
11290 SetOp = PPCISD::SETBCR;
11291 break;
11292 case 2: // Return the value of the LT bit of CR6.
11293 BitNo = 2;
11294 InvertBit = false;
11295 Bitx = PPC::sub_lt;
11296 SetOp = PPCISD::SETBC;
11297 break;
11298 case 3: // Return the inverted value of the LT bit of CR6.
11299 BitNo = 2;
11300 InvertBit = true;
11301 Bitx = PPC::sub_lt;
11302 SetOp = PPCISD::SETBCR;
11303 break;
11304 }
11305
11306 SDValue GlueOp = CompNode.getValue(1);
11307 if (Subtarget.isISA3_1()) {
11308 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11309 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11310 SDValue CRBit =
11311 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11312 CR6Reg, SubRegIdx, GlueOp),
11313 0);
11314 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11315 }
11316
11317 // Now that we have the comparison, emit a copy from the CR to a GPR.
11318 // This is flagged to the above dot comparison.
11319 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11320 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11321
11322 // Shift the bit into the low position.
11323 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11324 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11325 // Isolate the bit.
11326 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11327 DAG.getConstant(1, dl, MVT::i32));
11328
11329 // If we are supposed to, toggle the bit.
11330 if (InvertBit)
11331 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11332 DAG.getConstant(1, dl, MVT::i32));
11333 return Flags;
11334}
11335
11336SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11337 SelectionDAG &DAG) const {
11338 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11339 // the beginning of the argument list.
11340 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11341 SDLoc DL(Op);
11342 switch (Op.getConstantOperandVal(ArgStart)) {
11343 case Intrinsic::ppc_cfence: {
11344 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11345 SDValue Val = Op.getOperand(ArgStart + 1);
11346 EVT Ty = Val.getValueType();
11347 if (Ty == MVT::i128) {
11348 // FIXME: Testing one of two paired registers is sufficient to guarantee
11349 // ordering?
11350 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11351 }
11352 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11353 return SDValue(
11354 DAG.getMachineNode(
11355 Opcode, DL, MVT::Other,
11356 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11357 Op.getOperand(0)),
11358 0);
11359 }
11360 default:
11361 break;
11362 }
11363 return SDValue();
11364}
11365
11366// Lower scalar BSWAP64 to xxbrd.
11367SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11368 SDLoc dl(Op);
11369 if (!Subtarget.isPPC64())
11370 return Op;
11371 // MTVSRDD
11372 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11373 Op.getOperand(0));
11374 // XXBRD
11375 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11376 // MFVSRD
11377 int VectorIndex = 0;
11378 if (Subtarget.isLittleEndian())
11379 VectorIndex = 1;
11380 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11381 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11382 return Op;
11383}
11384
11385// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11386// compared to a value that is atomically loaded (atomic loads zero-extend).
11387SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11388 SelectionDAG &DAG) const {
11389 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11390 "Expecting an atomic compare-and-swap here.");
11391 SDLoc dl(Op);
11392 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11393 EVT MemVT = AtomicNode->getMemoryVT();
11394 if (MemVT.getSizeInBits() >= 32)
11395 return Op;
11396
11397 SDValue CmpOp = Op.getOperand(2);
11398 // If this is already correctly zero-extended, leave it alone.
11399 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11400 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11401 return Op;
11402
11403 // Clear the high bits of the compare operand.
11404 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11405 SDValue NewCmpOp =
11406 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11407 DAG.getConstant(MaskVal, dl, MVT::i32));
11408
11409 // Replace the existing compare operand with the properly zero-extended one.
11411 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11412 Ops.push_back(AtomicNode->getOperand(i));
11413 Ops[2] = NewCmpOp;
11414 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11415 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11416 auto NodeTy =
11418 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11419}
11420
11421SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11422 SelectionDAG &DAG) const {
11423 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11424 EVT MemVT = N->getMemoryVT();
11425 assert(MemVT.getSimpleVT() == MVT::i128 &&
11426 "Expect quadword atomic operations");
11427 SDLoc dl(N);
11428 unsigned Opc = N->getOpcode();
11429 switch (Opc) {
11430 case ISD::ATOMIC_LOAD: {
11431 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11432 // lowered to ppc instructions by pattern matching instruction selector.
11433 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11435 N->getOperand(0),
11436 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11437 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11438 Ops.push_back(N->getOperand(I));
11439 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11440 Ops, MemVT, N->getMemOperand());
11441 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11442 SDValue ValHi =
11443 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11444 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11445 DAG.getConstant(64, dl, MVT::i32));
11446 SDValue Val =
11447 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11448 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11449 {Val, LoadedVal.getValue(2)});
11450 }
11451 case ISD::ATOMIC_STORE: {
11452 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11453 // lowered to ppc instructions by pattern matching instruction selector.
11454 SDVTList Tys = DAG.getVTList(MVT::Other);
11456 N->getOperand(0),
11457 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11458 SDValue Val = N->getOperand(1);
11459 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11460 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11461 DAG.getConstant(64, dl, MVT::i32));
11462 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11463 Ops.push_back(ValLo);
11464 Ops.push_back(ValHi);
11465 Ops.push_back(N->getOperand(2));
11466 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11467 N->getMemOperand());
11468 }
11469 default:
11470 llvm_unreachable("Unexpected atomic opcode");
11471 }
11472}
11473
11475 SelectionDAG &DAG,
11476 const PPCSubtarget &Subtarget) {
11477 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11478
11479 enum DataClassMask {
11480 DC_NAN = 1 << 6,
11481 DC_NEG_INF = 1 << 4,
11482 DC_POS_INF = 1 << 5,
11483 DC_NEG_ZERO = 1 << 2,
11484 DC_POS_ZERO = 1 << 3,
11485 DC_NEG_SUBNORM = 1,
11486 DC_POS_SUBNORM = 1 << 1,
11487 };
11488
11489 EVT VT = Op.getValueType();
11490
11491 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11492 : VT == MVT::f64 ? PPC::XSTSTDCDP
11493 : PPC::XSTSTDCSP;
11494
11495 if (Mask == fcAllFlags)
11496 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11497 if (Mask == 0)
11498 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11499
11500 // When it's cheaper or necessary to test reverse flags.
11501 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11502 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11503 return DAG.getNOT(Dl, Rev, MVT::i1);
11504 }
11505
11506 // Power doesn't support testing whether a value is 'normal'. Test the rest
11507 // first, and test if it's 'not not-normal' with expected sign.
11508 if (Mask & fcNormal) {
11509 SDValue Rev(DAG.getMachineNode(
11510 TestOp, Dl, MVT::i32,
11511 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11512 DC_NEG_ZERO | DC_POS_ZERO |
11513 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11514 Dl, MVT::i32),
11515 Op),
11516 0);
11517 // Sign are stored in CR bit 0, result are in CR bit 2.
11518 SDValue Sign(
11519 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11520 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11521 0);
11522 SDValue Normal(DAG.getNOT(
11523 Dl,
11525 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11526 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11527 0),
11528 MVT::i1));
11529 if (Mask & fcPosNormal)
11530 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11531 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11532 if (Mask == fcPosNormal || Mask == fcNegNormal)
11533 return Result;
11534
11535 return DAG.getNode(
11536 ISD::OR, Dl, MVT::i1,
11537 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11538 }
11539
11540 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11541 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11542 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11543 bool IsQuiet = Mask & fcQNan;
11544 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11545
11546 // Quietness is determined by the first bit in fraction field.
11547 uint64_t QuietMask = 0;
11548 SDValue HighWord;
11549 if (VT == MVT::f128) {
11550 HighWord = DAG.getNode(
11551 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11552 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11553 QuietMask = 0x8000;
11554 } else if (VT == MVT::f64) {
11555 if (Subtarget.isPPC64()) {
11556 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11557 DAG.getBitcast(MVT::i64, Op),
11558 DAG.getConstant(1, Dl, MVT::i32));
11559 } else {
11560 SDValue Vec = DAG.getBitcast(
11561 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11562 HighWord = DAG.getNode(
11563 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11564 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11565 }
11566 QuietMask = 0x80000;
11567 } else if (VT == MVT::f32) {
11568 HighWord = DAG.getBitcast(MVT::i32, Op);
11569 QuietMask = 0x400000;
11570 }
11571 SDValue NanRes = DAG.getSetCC(
11572 Dl, MVT::i1,
11573 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11574 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11575 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11576 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11577 if (Mask == fcQNan || Mask == fcSNan)
11578 return NanRes;
11579
11580 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11581 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11582 NanRes);
11583 }
11584
11585 unsigned NativeMask = 0;
11586 if ((Mask & fcNan) == fcNan)
11587 NativeMask |= DC_NAN;
11588 if (Mask & fcNegInf)
11589 NativeMask |= DC_NEG_INF;
11590 if (Mask & fcPosInf)
11591 NativeMask |= DC_POS_INF;
11592 if (Mask & fcNegZero)
11593 NativeMask |= DC_NEG_ZERO;
11594 if (Mask & fcPosZero)
11595 NativeMask |= DC_POS_ZERO;
11596 if (Mask & fcNegSubnormal)
11597 NativeMask |= DC_NEG_SUBNORM;
11598 if (Mask & fcPosSubnormal)
11599 NativeMask |= DC_POS_SUBNORM;
11600 return SDValue(
11601 DAG.getMachineNode(
11602 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11604 TestOp, Dl, MVT::i32,
11605 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11606 0),
11607 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11608 0);
11609}
11610
11611SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11612 SelectionDAG &DAG) const {
11613 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11614 SDValue LHS = Op.getOperand(0);
11615 uint64_t RHSC = Op.getConstantOperandVal(1);
11616 SDLoc Dl(Op);
11617 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11618 if (LHS.getValueType() == MVT::ppcf128) {
11619 // The higher part determines the value class.
11620 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11621 DAG.getConstant(1, Dl, MVT::i32));
11622 }
11623
11624 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11625}
11626
11627SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11628 SelectionDAG &DAG) const {
11629 SDLoc dl(Op);
11630
11632 SDValue Op0 = Op.getOperand(0);
11633 EVT ValVT = Op0.getValueType();
11634 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11635 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11636 int64_t IntVal = Op.getConstantOperandVal(0);
11637 if (IntVal >= -16 && IntVal <= 15)
11638 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11639 dl);
11640 }
11641
11642 ReuseLoadInfo RLI;
11643 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11644 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11645 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11646 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11647
11648 MachineMemOperand *MMO =
11650 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11651 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11653 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11654 MVT::i32, MMO);
11655 if (RLI.ResChain)
11656 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11657 return Bits.getValue(0);
11658 }
11659
11660 // Create a stack slot that is 16-byte aligned.
11661 MachineFrameInfo &MFI = MF.getFrameInfo();
11662 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11663 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11664 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11665
11666 SDValue Val = Op0;
11667 // P10 hardware store forwarding requires that a single store contains all
11668 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11669 // to avoid load hit store on P10 when running binaries compiled for older
11670 // processors by generating two mergeable scalar stores to forward with the
11671 // vector load.
11672 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11673 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11674 ValVT.getSizeInBits() <= 64) {
11675 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11676 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11677 SDValue ShiftBy = DAG.getConstant(
11678 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11679 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11680 SDValue Plus8 =
11681 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11682 SDValue Store2 =
11683 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11684 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11685 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11687 }
11688
11689 // Store the input value into Value#0 of the stack slot.
11690 SDValue Store =
11691 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11692 // Load it out.
11693 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11694}
11695
11696SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11697 SelectionDAG &DAG) const {
11698 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11699 "Should only be called for ISD::INSERT_VECTOR_ELT");
11700
11701 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11702
11703 EVT VT = Op.getValueType();
11704 SDLoc dl(Op);
11705 SDValue V1 = Op.getOperand(0);
11706 SDValue V2 = Op.getOperand(1);
11707
11708 if (VT == MVT::v2f64 && C)
11709 return Op;
11710
11711 if (Subtarget.hasP9Vector()) {
11712 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11713 // because on P10, it allows this specific insert_vector_elt load pattern to
11714 // utilize the refactored load and store infrastructure in order to exploit
11715 // prefixed loads.
11716 // On targets with inexpensive direct moves (Power9 and up), a
11717 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11718 // load since a single precision load will involve conversion to double
11719 // precision on the load followed by another conversion to single precision.
11720 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11721 (isa<LoadSDNode>(V2))) {
11722 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11723 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11724 SDValue InsVecElt =
11725 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11726 BitcastLoad, Op.getOperand(2));
11727 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11728 }
11729 }
11730
11731 if (Subtarget.isISA3_1()) {
11732 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11733 return SDValue();
11734 // On P10, we have legal lowering for constant and variable indices for
11735 // all vectors.
11736 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11737 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11738 return Op;
11739 }
11740
11741 // Before P10, we have legal lowering for constant indices but not for
11742 // variable ones.
11743 if (!C)
11744 return SDValue();
11745
11746 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11747 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11748 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11749 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11750 unsigned InsertAtElement = C->getZExtValue();
11751 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11752 if (Subtarget.isLittleEndian()) {
11753 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11754 }
11755 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11756 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11757 }
11758 return Op;
11759}
11760
11761SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11762 SelectionDAG &DAG) const {
11763 SDLoc dl(Op);
11764 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11765 SDValue LoadChain = LN->getChain();
11766 SDValue BasePtr = LN->getBasePtr();
11767 EVT VT = Op.getValueType();
11768
11769 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11770 return Op;
11771
11772 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11773 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11774 // 2 or 4 vsx registers.
11775 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11776 "Type unsupported without MMA");
11777 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11778 "Type unsupported without paired vector support");
11779 Align Alignment = LN->getAlign();
11781 SmallVector<SDValue, 4> LoadChains;
11782 unsigned NumVecs = VT.getSizeInBits() / 128;
11783 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11784 SDValue Load =
11785 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11786 LN->getPointerInfo().getWithOffset(Idx * 16),
11787 commonAlignment(Alignment, Idx * 16),
11788 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11789 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11790 DAG.getConstant(16, dl, BasePtr.getValueType()));
11791 Loads.push_back(Load);
11792 LoadChains.push_back(Load.getValue(1));
11793 }
11794 if (Subtarget.isLittleEndian()) {
11795 std::reverse(Loads.begin(), Loads.end());
11796 std::reverse(LoadChains.begin(), LoadChains.end());
11797 }
11798 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11799 SDValue Value =
11800 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11801 dl, VT, Loads);
11802 SDValue RetOps[] = {Value, TF};
11803 return DAG.getMergeValues(RetOps, dl);
11804}
11805
11806SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11807 SelectionDAG &DAG) const {
11808 SDLoc dl(Op);
11809 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11810 SDValue StoreChain = SN->getChain();
11811 SDValue BasePtr = SN->getBasePtr();
11812 SDValue Value = SN->getValue();
11813 SDValue Value2 = SN->getValue();
11814 EVT StoreVT = Value.getValueType();
11815
11816 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11817 return Op;
11818
11819 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11820 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11821 // underlying registers individually.
11822 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11823 "Type unsupported without MMA");
11824 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11825 "Type unsupported without paired vector support");
11826 Align Alignment = SN->getAlign();
11828 unsigned NumVecs = 2;
11829 if (StoreVT == MVT::v512i1) {
11830 if (Subtarget.isISAFuture()) {
11831 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11832 MachineSDNode *ExtNode = DAG.getMachineNode(
11833 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11834
11835 Value = SDValue(ExtNode, 0);
11836 Value2 = SDValue(ExtNode, 1);
11837 } else
11838 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11839 NumVecs = 4;
11840 }
11841 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11842 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11843 SDValue Elt;
11844 if (Subtarget.isISAFuture()) {
11845 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11846 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11847 Idx > 1 ? Value2 : Value,
11848 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11849 } else
11850 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11851 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11852
11853 SDValue Store =
11854 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11855 SN->getPointerInfo().getWithOffset(Idx * 16),
11856 commonAlignment(Alignment, Idx * 16),
11857 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11858 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11859 DAG.getConstant(16, dl, BasePtr.getValueType()));
11860 Stores.push_back(Store);
11861 }
11862 SDValue TF = DAG.getTokenFactor(dl, Stores);
11863 return TF;
11864}
11865
11866SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11867 SDLoc dl(Op);
11868 if (Op.getValueType() == MVT::v4i32) {
11869 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11870
11871 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11872 // +16 as shift amt.
11873 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11874 SDValue RHSSwap = // = vrlw RHS, 16
11875 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11876
11877 // Shrinkify inputs to v8i16.
11878 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11879 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11880 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11881
11882 // Low parts multiplied together, generating 32-bit results (we ignore the
11883 // top parts).
11884 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11885 LHS, RHS, DAG, dl, MVT::v4i32);
11886
11887 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11888 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11889 // Shift the high parts up 16 bits.
11890 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11891 Neg16, DAG, dl);
11892 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11893 } else if (Op.getValueType() == MVT::v16i8) {
11894 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11895 bool isLittleEndian = Subtarget.isLittleEndian();
11896
11897 // Multiply the even 8-bit parts, producing 16-bit sums.
11898 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11899 LHS, RHS, DAG, dl, MVT::v8i16);
11900 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11901
11902 // Multiply the odd 8-bit parts, producing 16-bit sums.
11903 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11904 LHS, RHS, DAG, dl, MVT::v8i16);
11905 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11906
11907 // Merge the results together. Because vmuleub and vmuloub are
11908 // instructions with a big-endian bias, we must reverse the
11909 // element numbering and reverse the meaning of "odd" and "even"
11910 // when generating little endian code.
11911 int Ops[16];
11912 for (unsigned i = 0; i != 8; ++i) {
11913 if (isLittleEndian) {
11914 Ops[i*2 ] = 2*i;
11915 Ops[i*2+1] = 2*i+16;
11916 } else {
11917 Ops[i*2 ] = 2*i+1;
11918 Ops[i*2+1] = 2*i+1+16;
11919 }
11920 }
11921 if (isLittleEndian)
11922 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11923 else
11924 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11925 } else {
11926 llvm_unreachable("Unknown mul to lower!");
11927 }
11928}
11929
11930SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11931 bool IsStrict = Op->isStrictFPOpcode();
11932 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11933 !Subtarget.hasP9Vector())
11934 return SDValue();
11935
11936 return Op;
11937}
11938
11939// Custom lowering for fpext vf32 to v2f64
11940SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11941
11942 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11943 "Should only be called for ISD::FP_EXTEND");
11944
11945 // FIXME: handle extends from half precision float vectors on P9.
11946 // We only want to custom lower an extend from v2f32 to v2f64.
11947 if (Op.getValueType() != MVT::v2f64 ||
11948 Op.getOperand(0).getValueType() != MVT::v2f32)
11949 return SDValue();
11950
11951 SDLoc dl(Op);
11952 SDValue Op0 = Op.getOperand(0);
11953
11954 switch (Op0.getOpcode()) {
11955 default:
11956 return SDValue();
11958 assert(Op0.getNumOperands() == 2 &&
11959 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11960 "Node should have 2 operands with second one being a constant!");
11961
11962 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11963 return SDValue();
11964
11965 // Custom lower is only done for high or low doubleword.
11966 int Idx = Op0.getConstantOperandVal(1);
11967 if (Idx % 2 != 0)
11968 return SDValue();
11969
11970 // Since input is v4f32, at this point Idx is either 0 or 2.
11971 // Shift to get the doubleword position we want.
11972 int DWord = Idx >> 1;
11973
11974 // High and low word positions are different on little endian.
11975 if (Subtarget.isLittleEndian())
11976 DWord ^= 0x1;
11977
11978 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11979 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11980 }
11981 case ISD::FADD:
11982 case ISD::FMUL:
11983 case ISD::FSUB: {
11984 SDValue NewLoad[2];
11985 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11986 // Ensure both input are loads.
11987 SDValue LdOp = Op0.getOperand(i);
11988 if (LdOp.getOpcode() != ISD::LOAD)
11989 return SDValue();
11990 // Generate new load node.
11991 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11992 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11993 NewLoad[i] = DAG.getMemIntrinsicNode(
11994 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11995 LD->getMemoryVT(), LD->getMemOperand());
11996 }
11997 SDValue NewOp =
11998 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11999 NewLoad[1], Op0.getNode()->getFlags());
12000 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12001 DAG.getConstant(0, dl, MVT::i32));
12002 }
12003 case ISD::LOAD: {
12004 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12005 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12006 SDValue NewLd = DAG.getMemIntrinsicNode(
12007 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12008 LD->getMemoryVT(), LD->getMemOperand());
12009 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12010 DAG.getConstant(0, dl, MVT::i32));
12011 }
12012 }
12013 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12014}
12015
12016SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
12017 // Default to target independent lowering if there is a logical user of the
12018 // carry-bit.
12019 for (SDNode *U : Op->users()) {
12020 if (U->getOpcode() == ISD::SELECT)
12021 return SDValue();
12022 if (ISD::isBitwiseLogicOp(U->getOpcode())) {
12023 for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
12024 if (U->getOperand(i).getOpcode() != ISD::UADDO &&
12025 U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
12026 return SDValue();
12027 }
12028 }
12029 }
12030 SDValue LHS = Op.getOperand(0);
12031 SDValue RHS = Op.getOperand(1);
12032 SDLoc dl(Op);
12033
12034 // Default to target independent lowering for special cases handled there.
12035 if (isOneConstant(RHS) || isAllOnesConstant(RHS))
12036 return SDValue();
12037
12038 EVT VT = Op.getNode()->getValueType(0);
12039
12040 SDValue ADDC;
12041 SDValue Overflow;
12042 SDVTList VTs = Op.getNode()->getVTList();
12043
12044 ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
12045 Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
12046 DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
12047 ADDC.getValue(1));
12048 SDValue OverflowTrunc =
12049 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12050 SDValue Res =
12051 DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
12052 return Res;
12053}
12054
12055SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12056
12057 SDLoc dl(Op);
12058 SDValue LHS = Op.getOperand(0);
12059 SDValue RHS = Op.getOperand(1);
12060 EVT VT = Op.getNode()->getValueType(0);
12061
12062 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12063
12064 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12065 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12066
12067 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12068
12069 SDValue Overflow =
12070 DAG.getNode(ISD::SRL, dl, VT, And,
12071 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12072
12073 SDValue OverflowTrunc =
12074 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12075
12076 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12077}
12078
12079/// LowerOperation - Provide custom lowering hooks for some operations.
12080///
12082 switch (Op.getOpcode()) {
12083 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
12084 case ISD::UADDO: return LowerUaddo(Op, DAG);
12085 case ISD::FPOW: return lowerPow(Op, DAG);
12086 case ISD::FSIN: return lowerSin(Op, DAG);
12087 case ISD::FCOS: return lowerCos(Op, DAG);
12088 case ISD::FLOG: return lowerLog(Op, DAG);
12089 case ISD::FLOG10: return lowerLog10(Op, DAG);
12090 case ISD::FEXP: return lowerExp(Op, DAG);
12091 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12092 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12093 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12094 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12095 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12096 case ISD::STRICT_FSETCC:
12098 case ISD::SETCC: return LowerSETCC(Op, DAG);
12099 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12100 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12101 case ISD::SSUBO:
12102 return LowerSSUBO(Op, DAG);
12103
12104 case ISD::INLINEASM:
12105 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12106 // Variable argument lowering.
12107 case ISD::VASTART: return LowerVASTART(Op, DAG);
12108 case ISD::VAARG: return LowerVAARG(Op, DAG);
12109 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12110
12111 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12112 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12114 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12115
12116 // Exception handling lowering.
12117 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12118 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12119 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12120
12121 case ISD::LOAD: return LowerLOAD(Op, DAG);
12122 case ISD::STORE: return LowerSTORE(Op, DAG);
12123 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12124 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12127 case ISD::FP_TO_UINT:
12128 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12131 case ISD::UINT_TO_FP:
12132 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12133 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12134 case ISD::SET_ROUNDING:
12135 return LowerSET_ROUNDING(Op, DAG);
12136
12137 // Lower 64-bit shifts.
12138 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12139 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12140 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12141
12142 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12143 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12144
12145 // Vector-related lowering.
12146 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12147 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12148 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12149 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12150 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12151 case ISD::MUL: return LowerMUL(Op, DAG);
12152 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12154 case ISD::FP_ROUND:
12155 return LowerFP_ROUND(Op, DAG);
12156 case ISD::ROTL: return LowerROTL(Op, DAG);
12157
12158 // For counter-based loop handling.
12159 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12160
12161 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12162
12163 // Frame & Return address.
12164 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12165 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12166
12168 return LowerINTRINSIC_VOID(Op, DAG);
12169 case ISD::BSWAP:
12170 return LowerBSWAP(Op, DAG);
12172 return LowerATOMIC_CMP_SWAP(Op, DAG);
12173 case ISD::ATOMIC_STORE:
12174 return LowerATOMIC_LOAD_STORE(Op, DAG);
12175 case ISD::IS_FPCLASS:
12176 return LowerIS_FPCLASS(Op, DAG);
12177 }
12178}
12179
12182 SelectionDAG &DAG) const {
12183 SDLoc dl(N);
12184 switch (N->getOpcode()) {
12185 default:
12186 llvm_unreachable("Do not know how to custom type legalize this operation!");
12187 case ISD::ATOMIC_LOAD: {
12188 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12189 Results.push_back(Res);
12190 Results.push_back(Res.getValue(1));
12191 break;
12192 }
12193 case ISD::READCYCLECOUNTER: {
12194 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12195 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12196
12197 Results.push_back(
12198 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12199 Results.push_back(RTB.getValue(2));
12200 break;
12201 }
12203 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12204 break;
12205
12206 assert(N->getValueType(0) == MVT::i1 &&
12207 "Unexpected result type for CTR decrement intrinsic");
12208 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12209 N->getValueType(0));
12210 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12211 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12212 N->getOperand(1));
12213
12214 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12215 Results.push_back(NewInt.getValue(1));
12216 break;
12217 }
12219 switch (N->getConstantOperandVal(0)) {
12220 case Intrinsic::ppc_pack_longdouble:
12221 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12222 N->getOperand(2), N->getOperand(1)));
12223 break;
12224 case Intrinsic::ppc_maxfe:
12225 case Intrinsic::ppc_minfe:
12226 case Intrinsic::ppc_fnmsub:
12227 case Intrinsic::ppc_convert_f128_to_ppcf128:
12228 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12229 break;
12230 }
12231 break;
12232 }
12233 case ISD::VAARG: {
12234 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12235 return;
12236
12237 EVT VT = N->getValueType(0);
12238
12239 if (VT == MVT::i64) {
12240 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12241
12242 Results.push_back(NewNode);
12243 Results.push_back(NewNode.getValue(1));
12244 }
12245 return;
12246 }
12249 case ISD::FP_TO_SINT:
12250 case ISD::FP_TO_UINT: {
12251 // LowerFP_TO_INT() can only handle f32 and f64.
12252 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12253 MVT::ppcf128)
12254 return;
12255 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12256 Results.push_back(LoweredValue);
12257 if (N->isStrictFPOpcode())
12258 Results.push_back(LoweredValue.getValue(1));
12259 return;
12260 }
12261 case ISD::TRUNCATE: {
12262 if (!N->getValueType(0).isVector())
12263 return;
12264 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12265 if (Lowered)
12266 Results.push_back(Lowered);
12267 return;
12268 }
12269 case ISD::SCALAR_TO_VECTOR: {
12270 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12271 if (Lowered)
12272 Results.push_back(Lowered);
12273 return;
12274 }
12275 case ISD::FSHL:
12276 case ISD::FSHR:
12277 // Don't handle funnel shifts here.
12278 return;
12279 case ISD::BITCAST:
12280 // Don't handle bitcast here.
12281 return;
12282 case ISD::FP_EXTEND:
12283 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12284 if (Lowered)
12285 Results.push_back(Lowered);
12286 return;
12287 }
12288}
12289
12290//===----------------------------------------------------------------------===//
12291// Other Lowering Code
12292//===----------------------------------------------------------------------===//
12293
12295 return Builder.CreateIntrinsic(Id, {}, {});
12296}
12297
12298// The mappings for emitLeading/TrailingFence is taken from
12299// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12301 Instruction *Inst,
12302 AtomicOrdering Ord) const {
12304 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12305 if (isReleaseOrStronger(Ord))
12306 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12307 return nullptr;
12308}
12309
12311 Instruction *Inst,
12312 AtomicOrdering Ord) const {
12313 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12314 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12315 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12316 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12317 if (isa<LoadInst>(Inst))
12318 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12319 {Inst});
12320 // FIXME: Can use isync for rmw operation.
12321 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12322 }
12323 return nullptr;
12324}
12325
12328 unsigned AtomicSize,
12329 unsigned BinOpcode,
12330 unsigned CmpOpcode,
12331 unsigned CmpPred) const {
12332 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12333 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12334
12335 auto LoadMnemonic = PPC::LDARX;
12336 auto StoreMnemonic = PPC::STDCX;
12337 switch (AtomicSize) {
12338 default:
12339 llvm_unreachable("Unexpected size of atomic entity");
12340 case 1:
12341 LoadMnemonic = PPC::LBARX;
12342 StoreMnemonic = PPC::STBCX;
12343 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12344 break;
12345 case 2:
12346 LoadMnemonic = PPC::LHARX;
12347 StoreMnemonic = PPC::STHCX;
12348 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12349 break;
12350 case 4:
12351 LoadMnemonic = PPC::LWARX;
12352 StoreMnemonic = PPC::STWCX;
12353 break;
12354 case 8:
12355 LoadMnemonic = PPC::LDARX;
12356 StoreMnemonic = PPC::STDCX;
12357 break;
12358 }
12359
12360 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12361 MachineFunction *F = BB->getParent();
12363
12364 Register dest = MI.getOperand(0).getReg();
12365 Register ptrA = MI.getOperand(1).getReg();
12366 Register ptrB = MI.getOperand(2).getReg();
12367 Register incr = MI.getOperand(3).getReg();
12368 DebugLoc dl = MI.getDebugLoc();
12369
12370 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12371 MachineBasicBlock *loop2MBB =
12372 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12373 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12374 F->insert(It, loopMBB);
12375 if (CmpOpcode)
12376 F->insert(It, loop2MBB);
12377 F->insert(It, exitMBB);
12378 exitMBB->splice(exitMBB->begin(), BB,
12379 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12381
12382 MachineRegisterInfo &RegInfo = F->getRegInfo();
12383 Register TmpReg = (!BinOpcode) ? incr :
12384 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12385 : &PPC::GPRCRegClass);
12386
12387 // thisMBB:
12388 // ...
12389 // fallthrough --> loopMBB
12390 BB->addSuccessor(loopMBB);
12391
12392 // loopMBB:
12393 // l[wd]arx dest, ptr
12394 // add r0, dest, incr
12395 // st[wd]cx. r0, ptr
12396 // bne- loopMBB
12397 // fallthrough --> exitMBB
12398
12399 // For max/min...
12400 // loopMBB:
12401 // l[wd]arx dest, ptr
12402 // cmpl?[wd] dest, incr
12403 // bgt exitMBB
12404 // loop2MBB:
12405 // st[wd]cx. dest, ptr
12406 // bne- loopMBB
12407 // fallthrough --> exitMBB
12408
12409 BB = loopMBB;
12410 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12411 .addReg(ptrA).addReg(ptrB);
12412 if (BinOpcode)
12413 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12414 if (CmpOpcode) {
12415 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12416 // Signed comparisons of byte or halfword values must be sign-extended.
12417 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12418 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12419 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12420 ExtReg).addReg(dest);
12421 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12422 } else
12423 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12424
12425 BuildMI(BB, dl, TII->get(PPC::BCC))
12426 .addImm(CmpPred)
12427 .addReg(CrReg)
12428 .addMBB(exitMBB);
12429 BB->addSuccessor(loop2MBB);
12430 BB->addSuccessor(exitMBB);
12431 BB = loop2MBB;
12432 }
12433 BuildMI(BB, dl, TII->get(StoreMnemonic))
12434 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12435 BuildMI(BB, dl, TII->get(PPC::BCC))
12436 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12437 BB->addSuccessor(loopMBB);
12438 BB->addSuccessor(exitMBB);
12439
12440 // exitMBB:
12441 // ...
12442 BB = exitMBB;
12443 return BB;
12444}
12445
12447 switch(MI.getOpcode()) {
12448 default:
12449 return false;
12450 case PPC::COPY:
12451 return TII->isSignExtended(MI.getOperand(1).getReg(),
12452 &MI.getMF()->getRegInfo());
12453 case PPC::LHA:
12454 case PPC::LHA8:
12455 case PPC::LHAU:
12456 case PPC::LHAU8:
12457 case PPC::LHAUX:
12458 case PPC::LHAUX8:
12459 case PPC::LHAX:
12460 case PPC::LHAX8:
12461 case PPC::LWA:
12462 case PPC::LWAUX:
12463 case PPC::LWAX:
12464 case PPC::LWAX_32:
12465 case PPC::LWA_32:
12466 case PPC::PLHA:
12467 case PPC::PLHA8:
12468 case PPC::PLHA8pc:
12469 case PPC::PLHApc:
12470 case PPC::PLWA:
12471 case PPC::PLWA8:
12472 case PPC::PLWA8pc:
12473 case PPC::PLWApc:
12474 case PPC::EXTSB:
12475 case PPC::EXTSB8:
12476 case PPC::EXTSB8_32_64:
12477 case PPC::EXTSB8_rec:
12478 case PPC::EXTSB_rec:
12479 case PPC::EXTSH:
12480 case PPC::EXTSH8:
12481 case PPC::EXTSH8_32_64:
12482 case PPC::EXTSH8_rec:
12483 case PPC::EXTSH_rec:
12484 case PPC::EXTSW:
12485 case PPC::EXTSWSLI:
12486 case PPC::EXTSWSLI_32_64:
12487 case PPC::EXTSWSLI_32_64_rec:
12488 case PPC::EXTSWSLI_rec:
12489 case PPC::EXTSW_32:
12490 case PPC::EXTSW_32_64:
12491 case PPC::EXTSW_32_64_rec:
12492 case PPC::EXTSW_rec:
12493 case PPC::SRAW:
12494 case PPC::SRAWI:
12495 case PPC::SRAWI_rec:
12496 case PPC::SRAW_rec:
12497 return true;
12498 }
12499 return false;
12500}
12501
12504 bool is8bit, // operation
12505 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12506 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12507 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12508
12509 // If this is a signed comparison and the value being compared is not known
12510 // to be sign extended, sign extend it here.
12511 DebugLoc dl = MI.getDebugLoc();
12512 MachineFunction *F = BB->getParent();
12513 MachineRegisterInfo &RegInfo = F->getRegInfo();
12514 Register incr = MI.getOperand(3).getReg();
12515 bool IsSignExtended =
12516 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12517
12518 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12519 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12520 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12521 .addReg(MI.getOperand(3).getReg());
12522 MI.getOperand(3).setReg(ValueReg);
12523 incr = ValueReg;
12524 }
12525 // If we support part-word atomic mnemonics, just use them
12526 if (Subtarget.hasPartwordAtomics())
12527 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12528 CmpPred);
12529
12530 // In 64 bit mode we have to use 64 bits for addresses, even though the
12531 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12532 // registers without caring whether they're 32 or 64, but here we're
12533 // doing actual arithmetic on the addresses.
12534 bool is64bit = Subtarget.isPPC64();
12535 bool isLittleEndian = Subtarget.isLittleEndian();
12536 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12537
12538 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12540
12541 Register dest = MI.getOperand(0).getReg();
12542 Register ptrA = MI.getOperand(1).getReg();
12543 Register ptrB = MI.getOperand(2).getReg();
12544
12545 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12546 MachineBasicBlock *loop2MBB =
12547 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12548 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12549 F->insert(It, loopMBB);
12550 if (CmpOpcode)
12551 F->insert(It, loop2MBB);
12552 F->insert(It, exitMBB);
12553 exitMBB->splice(exitMBB->begin(), BB,
12554 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12556
12557 const TargetRegisterClass *RC =
12558 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12559 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12560
12561 Register PtrReg = RegInfo.createVirtualRegister(RC);
12562 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12563 Register ShiftReg =
12564 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12565 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12566 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12567 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12568 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12569 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12570 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12571 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12572 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12573 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12574 Register Ptr1Reg;
12575 Register TmpReg =
12576 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12577
12578 // thisMBB:
12579 // ...
12580 // fallthrough --> loopMBB
12581 BB->addSuccessor(loopMBB);
12582
12583 // The 4-byte load must be aligned, while a char or short may be
12584 // anywhere in the word. Hence all this nasty bookkeeping code.
12585 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12586 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12587 // xori shift, shift1, 24 [16]
12588 // rlwinm ptr, ptr1, 0, 0, 29
12589 // slw incr2, incr, shift
12590 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12591 // slw mask, mask2, shift
12592 // loopMBB:
12593 // lwarx tmpDest, ptr
12594 // add tmp, tmpDest, incr2
12595 // andc tmp2, tmpDest, mask
12596 // and tmp3, tmp, mask
12597 // or tmp4, tmp3, tmp2
12598 // stwcx. tmp4, ptr
12599 // bne- loopMBB
12600 // fallthrough --> exitMBB
12601 // srw SrwDest, tmpDest, shift
12602 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12603 if (ptrA != ZeroReg) {
12604 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12605 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12606 .addReg(ptrA)
12607 .addReg(ptrB);
12608 } else {
12609 Ptr1Reg = ptrB;
12610 }
12611 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12612 // mode.
12613 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12614 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12615 .addImm(3)
12616 .addImm(27)
12617 .addImm(is8bit ? 28 : 27);
12618 if (!isLittleEndian)
12619 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12620 .addReg(Shift1Reg)
12621 .addImm(is8bit ? 24 : 16);
12622 if (is64bit)
12623 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12624 .addReg(Ptr1Reg)
12625 .addImm(0)
12626 .addImm(61);
12627 else
12628 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12629 .addReg(Ptr1Reg)
12630 .addImm(0)
12631 .addImm(0)
12632 .addImm(29);
12633 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12634 if (is8bit)
12635 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12636 else {
12637 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12638 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12639 .addReg(Mask3Reg)
12640 .addImm(65535);
12641 }
12642 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12643 .addReg(Mask2Reg)
12644 .addReg(ShiftReg);
12645
12646 BB = loopMBB;
12647 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12648 .addReg(ZeroReg)
12649 .addReg(PtrReg);
12650 if (BinOpcode)
12651 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12652 .addReg(Incr2Reg)
12653 .addReg(TmpDestReg);
12654 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12655 .addReg(TmpDestReg)
12656 .addReg(MaskReg);
12657 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12658 if (CmpOpcode) {
12659 // For unsigned comparisons, we can directly compare the shifted values.
12660 // For signed comparisons we shift and sign extend.
12661 Register SReg = RegInfo.createVirtualRegister(GPRC);
12662 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12663 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12664 .addReg(TmpDestReg)
12665 .addReg(MaskReg);
12666 unsigned ValueReg = SReg;
12667 unsigned CmpReg = Incr2Reg;
12668 if (CmpOpcode == PPC::CMPW) {
12669 ValueReg = RegInfo.createVirtualRegister(GPRC);
12670 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12671 .addReg(SReg)
12672 .addReg(ShiftReg);
12673 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12674 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12675 .addReg(ValueReg);
12676 ValueReg = ValueSReg;
12677 CmpReg = incr;
12678 }
12679 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12680 BuildMI(BB, dl, TII->get(PPC::BCC))
12681 .addImm(CmpPred)
12682 .addReg(CrReg)
12683 .addMBB(exitMBB);
12684 BB->addSuccessor(loop2MBB);
12685 BB->addSuccessor(exitMBB);
12686 BB = loop2MBB;
12687 }
12688 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12689 BuildMI(BB, dl, TII->get(PPC::STWCX))
12690 .addReg(Tmp4Reg)
12691 .addReg(ZeroReg)
12692 .addReg(PtrReg);
12693 BuildMI(BB, dl, TII->get(PPC::BCC))
12695 .addReg(PPC::CR0)
12696 .addMBB(loopMBB);
12697 BB->addSuccessor(loopMBB);
12698 BB->addSuccessor(exitMBB);
12699
12700 // exitMBB:
12701 // ...
12702 BB = exitMBB;
12703 // Since the shift amount is not a constant, we need to clear
12704 // the upper bits with a separate RLWINM.
12705 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12706 .addReg(SrwDestReg)
12707 .addImm(0)
12708 .addImm(is8bit ? 24 : 16)
12709 .addImm(31);
12710 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12711 .addReg(TmpDestReg)
12712 .addReg(ShiftReg);
12713 return BB;
12714}
12715
12718 MachineBasicBlock *MBB) const {
12719 DebugLoc DL = MI.getDebugLoc();
12720 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12721 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12722
12723 MachineFunction *MF = MBB->getParent();
12725
12726 const BasicBlock *BB = MBB->getBasicBlock();
12728
12729 Register DstReg = MI.getOperand(0).getReg();
12730 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12731 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12732 Register mainDstReg = MRI.createVirtualRegister(RC);
12733 Register restoreDstReg = MRI.createVirtualRegister(RC);
12734
12735 MVT PVT = getPointerTy(MF->getDataLayout());
12736 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12737 "Invalid Pointer Size!");
12738 // For v = setjmp(buf), we generate
12739 //
12740 // thisMBB:
12741 // SjLjSetup mainMBB
12742 // bl mainMBB
12743 // v_restore = 1
12744 // b sinkMBB
12745 //
12746 // mainMBB:
12747 // buf[LabelOffset] = LR
12748 // v_main = 0
12749 //
12750 // sinkMBB:
12751 // v = phi(main, restore)
12752 //
12753
12754 MachineBasicBlock *thisMBB = MBB;
12755 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12756 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12757 MF->insert(I, mainMBB);
12758 MF->insert(I, sinkMBB);
12759
12761
12762 // Transfer the remainder of BB and its successor edges to sinkMBB.
12763 sinkMBB->splice(sinkMBB->begin(), MBB,
12764 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12766
12767 // Note that the structure of the jmp_buf used here is not compatible
12768 // with that used by libc, and is not designed to be. Specifically, it
12769 // stores only those 'reserved' registers that LLVM does not otherwise
12770 // understand how to spill. Also, by convention, by the time this
12771 // intrinsic is called, Clang has already stored the frame address in the
12772 // first slot of the buffer and stack address in the third. Following the
12773 // X86 target code, we'll store the jump address in the second slot. We also
12774 // need to save the TOC pointer (R2) to handle jumps between shared
12775 // libraries, and that will be stored in the fourth slot. The thread
12776 // identifier (R13) is not affected.
12777
12778 // thisMBB:
12779 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12780 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12781 const int64_t BPOffset = 4 * PVT.getStoreSize();
12782
12783 // Prepare IP either in reg.
12784 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12785 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12786 Register BufReg = MI.getOperand(1).getReg();
12787
12788 if (Subtarget.is64BitELFABI()) {
12790 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12791 .addReg(PPC::X2)
12792 .addImm(TOCOffset)
12793 .addReg(BufReg)
12794 .cloneMemRefs(MI);
12795 }
12796
12797 // Naked functions never have a base pointer, and so we use r1. For all
12798 // other functions, this decision must be delayed until during PEI.
12799 unsigned BaseReg;
12800 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12801 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12802 else
12803 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12804
12805 MIB = BuildMI(*thisMBB, MI, DL,
12806 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12807 .addReg(BaseReg)
12808 .addImm(BPOffset)
12809 .addReg(BufReg)
12810 .cloneMemRefs(MI);
12811
12812 // Setup
12813 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12814 MIB.addRegMask(TRI->getNoPreservedMask());
12815
12816 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12817
12818 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12819 .addMBB(mainMBB);
12820 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12821
12822 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12823 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12824
12825 // mainMBB:
12826 // mainDstReg = 0
12827 MIB =
12828 BuildMI(mainMBB, DL,
12829 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12830
12831 // Store IP
12832 if (Subtarget.isPPC64()) {
12833 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12834 .addReg(LabelReg)
12835 .addImm(LabelOffset)
12836 .addReg(BufReg);
12837 } else {
12838 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12839 .addReg(LabelReg)
12840 .addImm(LabelOffset)
12841 .addReg(BufReg);
12842 }
12843 MIB.cloneMemRefs(MI);
12844
12845 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12846 mainMBB->addSuccessor(sinkMBB);
12847
12848 // sinkMBB:
12849 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12850 TII->get(PPC::PHI), DstReg)
12851 .addReg(mainDstReg).addMBB(mainMBB)
12852 .addReg(restoreDstReg).addMBB(thisMBB);
12853
12854 MI.eraseFromParent();
12855 return sinkMBB;
12856}
12857
12860 MachineBasicBlock *MBB) const {
12861 DebugLoc DL = MI.getDebugLoc();
12862 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12863
12864 MachineFunction *MF = MBB->getParent();
12866
12867 MVT PVT = getPointerTy(MF->getDataLayout());
12868 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12869 "Invalid Pointer Size!");
12870
12871 const TargetRegisterClass *RC =
12872 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12873 Register Tmp = MRI.createVirtualRegister(RC);
12874 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12875 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12876 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12877 unsigned BP =
12878 (PVT == MVT::i64)
12879 ? PPC::X30
12880 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12881 : PPC::R30);
12882
12884
12885 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12886 const int64_t SPOffset = 2 * PVT.getStoreSize();
12887 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12888 const int64_t BPOffset = 4 * PVT.getStoreSize();
12889
12890 Register BufReg = MI.getOperand(0).getReg();
12891
12892 // Reload FP (the jumped-to function may not have had a
12893 // frame pointer, and if so, then its r31 will be restored
12894 // as necessary).
12895 if (PVT == MVT::i64) {
12896 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12897 .addImm(0)
12898 .addReg(BufReg);
12899 } else {
12900 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12901 .addImm(0)
12902 .addReg(BufReg);
12903 }
12904 MIB.cloneMemRefs(MI);
12905
12906 // Reload IP
12907 if (PVT == MVT::i64) {
12908 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12909 .addImm(LabelOffset)
12910 .addReg(BufReg);
12911 } else {
12912 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12913 .addImm(LabelOffset)
12914 .addReg(BufReg);
12915 }
12916 MIB.cloneMemRefs(MI);
12917
12918 // Reload SP
12919 if (PVT == MVT::i64) {
12920 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12921 .addImm(SPOffset)
12922 .addReg(BufReg);
12923 } else {
12924 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12925 .addImm(SPOffset)
12926 .addReg(BufReg);
12927 }
12928 MIB.cloneMemRefs(MI);
12929
12930 // Reload BP
12931 if (PVT == MVT::i64) {
12932 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12933 .addImm(BPOffset)
12934 .addReg(BufReg);
12935 } else {
12936 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12937 .addImm(BPOffset)
12938 .addReg(BufReg);
12939 }
12940 MIB.cloneMemRefs(MI);
12941
12942 // Reload TOC
12943 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12945 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12946 .addImm(TOCOffset)
12947 .addReg(BufReg)
12948 .cloneMemRefs(MI);
12949 }
12950
12951 // Jump
12952 BuildMI(*MBB, MI, DL,
12953 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12954 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12955
12956 MI.eraseFromParent();
12957 return MBB;
12958}
12959
12961 // If the function specifically requests inline stack probes, emit them.
12962 if (MF.getFunction().hasFnAttribute("probe-stack"))
12963 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12964 "inline-asm";
12965 return false;
12966}
12967
12969 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12970 unsigned StackAlign = TFI->getStackAlignment();
12971 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12972 "Unexpected stack alignment");
12973 // The default stack probe size is 4096 if the function has no
12974 // stack-probe-size attribute.
12975 const Function &Fn = MF.getFunction();
12976 unsigned StackProbeSize =
12977 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12978 // Round down to the stack alignment.
12979 StackProbeSize &= ~(StackAlign - 1);
12980 return StackProbeSize ? StackProbeSize : StackAlign;
12981}
12982
12983// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12984// into three phases. In the first phase, it uses pseudo instruction
12985// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12986// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12987// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12988// MaxCallFrameSize so that it can calculate correct data area pointer.
12991 MachineBasicBlock *MBB) const {
12992 const bool isPPC64 = Subtarget.isPPC64();
12993 MachineFunction *MF = MBB->getParent();
12994 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12995 DebugLoc DL = MI.getDebugLoc();
12996 const unsigned ProbeSize = getStackProbeSize(*MF);
12997 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12999 // The CFG of probing stack looks as
13000 // +-----+
13001 // | MBB |
13002 // +--+--+
13003 // |
13004 // +----v----+
13005 // +--->+ TestMBB +---+
13006 // | +----+----+ |
13007 // | | |
13008 // | +-----v----+ |
13009 // +---+ BlockMBB | |
13010 // +----------+ |
13011 // |
13012 // +---------+ |
13013 // | TailMBB +<--+
13014 // +---------+
13015 // In MBB, calculate previous frame pointer and final stack pointer.
13016 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13017 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13018 // TailMBB is spliced via \p MI.
13019 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13020 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13021 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13022
13024 MF->insert(MBBIter, TestMBB);
13025 MF->insert(MBBIter, BlockMBB);
13026 MF->insert(MBBIter, TailMBB);
13027
13028 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13029 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13030
13031 Register DstReg = MI.getOperand(0).getReg();
13032 Register NegSizeReg = MI.getOperand(1).getReg();
13033 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13034 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13035 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13036 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13037
13038 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13039 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13040 // NegSize.
13041 unsigned ProbeOpc;
13042 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13043 ProbeOpc =
13044 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13045 else
13046 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13047 // and NegSizeReg will be allocated in the same phyreg to avoid
13048 // redundant copy when NegSizeReg has only one use which is current MI and
13049 // will be replaced by PREPARE_PROBED_ALLOCA then.
13050 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13051 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13052 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13053 .addDef(ActualNegSizeReg)
13054 .addReg(NegSizeReg)
13055 .add(MI.getOperand(2))
13056 .add(MI.getOperand(3));
13057
13058 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13059 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13060 FinalStackPtr)
13061 .addReg(SPReg)
13062 .addReg(ActualNegSizeReg);
13063
13064 // Materialize a scratch register for update.
13065 int64_t NegProbeSize = -(int64_t)ProbeSize;
13066 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13067 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13068 if (!isInt<16>(NegProbeSize)) {
13069 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13070 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13071 .addImm(NegProbeSize >> 16);
13072 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13073 ScratchReg)
13074 .addReg(TempReg)
13075 .addImm(NegProbeSize & 0xFFFF);
13076 } else
13077 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13078 .addImm(NegProbeSize);
13079
13080 {
13081 // Probing leading residual part.
13082 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13083 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13084 .addReg(ActualNegSizeReg)
13085 .addReg(ScratchReg);
13086 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13087 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13088 .addReg(Div)
13089 .addReg(ScratchReg);
13090 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13091 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13092 .addReg(Mul)
13093 .addReg(ActualNegSizeReg);
13094 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13095 .addReg(FramePointer)
13096 .addReg(SPReg)
13097 .addReg(NegMod);
13098 }
13099
13100 {
13101 // Remaining part should be multiple of ProbeSize.
13102 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13103 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13104 .addReg(SPReg)
13105 .addReg(FinalStackPtr);
13106 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13108 .addReg(CmpResult)
13109 .addMBB(TailMBB);
13110 TestMBB->addSuccessor(BlockMBB);
13111 TestMBB->addSuccessor(TailMBB);
13112 }
13113
13114 {
13115 // Touch the block.
13116 // |P...|P...|P...
13117 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13118 .addReg(FramePointer)
13119 .addReg(SPReg)
13120 .addReg(ScratchReg);
13121 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13122 BlockMBB->addSuccessor(TestMBB);
13123 }
13124
13125 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13126 // DYNAREAOFFSET pseudo instruction to get the future result.
13127 Register MaxCallFrameSizeReg =
13128 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13129 BuildMI(TailMBB, DL,
13130 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13131 MaxCallFrameSizeReg)
13132 .add(MI.getOperand(2))
13133 .add(MI.getOperand(3));
13134 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13135 .addReg(SPReg)
13136 .addReg(MaxCallFrameSizeReg);
13137
13138 // Splice instructions after MI to TailMBB.
13139 TailMBB->splice(TailMBB->end(), MBB,
13140 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13142 MBB->addSuccessor(TestMBB);
13143
13144 // Delete the pseudo instruction.
13145 MI.eraseFromParent();
13146
13147 ++NumDynamicAllocaProbed;
13148 return TailMBB;
13149}
13150
13152 switch (MI.getOpcode()) {
13153 case PPC::SELECT_CC_I4:
13154 case PPC::SELECT_CC_I8:
13155 case PPC::SELECT_CC_F4:
13156 case PPC::SELECT_CC_F8:
13157 case PPC::SELECT_CC_F16:
13158 case PPC::SELECT_CC_VRRC:
13159 case PPC::SELECT_CC_VSFRC:
13160 case PPC::SELECT_CC_VSSRC:
13161 case PPC::SELECT_CC_VSRC:
13162 case PPC::SELECT_CC_SPE4:
13163 case PPC::SELECT_CC_SPE:
13164 return true;
13165 default:
13166 return false;
13167 }
13168}
13169
13170static bool IsSelect(MachineInstr &MI) {
13171 switch (MI.getOpcode()) {
13172 case PPC::SELECT_I4:
13173 case PPC::SELECT_I8:
13174 case PPC::SELECT_F4:
13175 case PPC::SELECT_F8:
13176 case PPC::SELECT_F16:
13177 case PPC::SELECT_SPE:
13178 case PPC::SELECT_SPE4:
13179 case PPC::SELECT_VRRC:
13180 case PPC::SELECT_VSFRC:
13181 case PPC::SELECT_VSSRC:
13182 case PPC::SELECT_VSRC:
13183 return true;
13184 default:
13185 return false;
13186 }
13187}
13188
13191 MachineBasicBlock *BB) const {
13192 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13193 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13194 if (Subtarget.is64BitELFABI() &&
13195 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13196 !Subtarget.isUsingPCRelativeCalls()) {
13197 // Call lowering should have added an r2 operand to indicate a dependence
13198 // on the TOC base pointer value. It can't however, because there is no
13199 // way to mark the dependence as implicit there, and so the stackmap code
13200 // will confuse it with a regular operand. Instead, add the dependence
13201 // here.
13202 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13203 }
13204
13205 return emitPatchPoint(MI, BB);
13206 }
13207
13208 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13209 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13210 return emitEHSjLjSetJmp(MI, BB);
13211 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13212 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13213 return emitEHSjLjLongJmp(MI, BB);
13214 }
13215
13216 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13217
13218 // To "insert" these instructions we actually have to insert their
13219 // control-flow patterns.
13220 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13222
13223 MachineFunction *F = BB->getParent();
13224 MachineRegisterInfo &MRI = F->getRegInfo();
13225
13226 if (Subtarget.hasISEL() &&
13227 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13228 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13229 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13231 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13232 MI.getOpcode() == PPC::SELECT_CC_I8)
13233 Cond.push_back(MI.getOperand(4));
13234 else
13236 Cond.push_back(MI.getOperand(1));
13237
13238 DebugLoc dl = MI.getDebugLoc();
13239 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13240 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13241 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13242 // The incoming instruction knows the destination vreg to set, the
13243 // condition code register to branch on, the true/false values to
13244 // select between, and a branch opcode to use.
13245
13246 // thisMBB:
13247 // ...
13248 // TrueVal = ...
13249 // cmpTY ccX, r1, r2
13250 // bCC sinkMBB
13251 // fallthrough --> copy0MBB
13252 MachineBasicBlock *thisMBB = BB;
13253 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13254 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13255 DebugLoc dl = MI.getDebugLoc();
13256 F->insert(It, copy0MBB);
13257 F->insert(It, sinkMBB);
13258
13259 // Set the call frame size on entry to the new basic blocks.
13260 // See https://reviews.llvm.org/D156113.
13261 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13262 copy0MBB->setCallFrameSize(CallFrameSize);
13263 sinkMBB->setCallFrameSize(CallFrameSize);
13264
13265 // Transfer the remainder of BB and its successor edges to sinkMBB.
13266 sinkMBB->splice(sinkMBB->begin(), BB,
13267 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13269
13270 // Next, add the true and fallthrough blocks as its successors.
13271 BB->addSuccessor(copy0MBB);
13272 BB->addSuccessor(sinkMBB);
13273
13274 if (IsSelect(MI)) {
13275 BuildMI(BB, dl, TII->get(PPC::BC))
13276 .addReg(MI.getOperand(1).getReg())
13277 .addMBB(sinkMBB);
13278 } else {
13279 unsigned SelectPred = MI.getOperand(4).getImm();
13280 BuildMI(BB, dl, TII->get(PPC::BCC))
13281 .addImm(SelectPred)
13282 .addReg(MI.getOperand(1).getReg())
13283 .addMBB(sinkMBB);
13284 }
13285
13286 // copy0MBB:
13287 // %FalseValue = ...
13288 // # fallthrough to sinkMBB
13289 BB = copy0MBB;
13290
13291 // Update machine-CFG edges
13292 BB->addSuccessor(sinkMBB);
13293
13294 // sinkMBB:
13295 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13296 // ...
13297 BB = sinkMBB;
13298 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13299 .addReg(MI.getOperand(3).getReg())
13300 .addMBB(copy0MBB)
13301 .addReg(MI.getOperand(2).getReg())
13302 .addMBB(thisMBB);
13303 } else if (MI.getOpcode() == PPC::ReadTB) {
13304 // To read the 64-bit time-base register on a 32-bit target, we read the
13305 // two halves. Should the counter have wrapped while it was being read, we
13306 // need to try again.
13307 // ...
13308 // readLoop:
13309 // mfspr Rx,TBU # load from TBU
13310 // mfspr Ry,TB # load from TB
13311 // mfspr Rz,TBU # load from TBU
13312 // cmpw crX,Rx,Rz # check if 'old'='new'
13313 // bne readLoop # branch if they're not equal
13314 // ...
13315
13316 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13317 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13318 DebugLoc dl = MI.getDebugLoc();
13319 F->insert(It, readMBB);
13320 F->insert(It, sinkMBB);
13321
13322 // Transfer the remainder of BB and its successor edges to sinkMBB.
13323 sinkMBB->splice(sinkMBB->begin(), BB,
13324 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13326
13327 BB->addSuccessor(readMBB);
13328 BB = readMBB;
13329
13330 MachineRegisterInfo &RegInfo = F->getRegInfo();
13331 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13332 Register LoReg = MI.getOperand(0).getReg();
13333 Register HiReg = MI.getOperand(1).getReg();
13334
13335 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13336 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13337 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13338
13339 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13340
13341 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13342 .addReg(HiReg)
13343 .addReg(ReadAgainReg);
13344 BuildMI(BB, dl, TII->get(PPC::BCC))
13346 .addReg(CmpReg)
13347 .addMBB(readMBB);
13348
13349 BB->addSuccessor(readMBB);
13350 BB->addSuccessor(sinkMBB);
13351 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13352 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13353 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13354 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13355 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13356 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13357 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13358 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13359
13360 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13361 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13362 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13363 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13364 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13365 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13366 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13367 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13368
13369 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13370 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13371 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13372 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13373 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13374 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13375 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13376 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13377
13378 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13379 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13380 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13381 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13382 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13383 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13384 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13385 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13386
13387 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13388 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13389 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13390 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13391 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13392 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13393 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13394 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13395
13396 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13397 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13398 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13399 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13400 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13401 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13402 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13403 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13404
13405 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13406 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13407 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13408 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13409 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13410 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13411 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13412 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13413
13414 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13415 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13416 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13417 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13418 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13419 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13420 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13421 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13422
13423 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13424 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13425 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13426 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13427 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13428 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13429 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13430 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13431
13432 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13433 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13434 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13435 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13436 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13437 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13438 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13439 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13440
13441 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13442 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13443 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13444 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13445 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13446 BB = EmitAtomicBinary(MI, BB, 4, 0);
13447 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13448 BB = EmitAtomicBinary(MI, BB, 8, 0);
13449 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13450 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13451 (Subtarget.hasPartwordAtomics() &&
13452 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13453 (Subtarget.hasPartwordAtomics() &&
13454 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13455 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13456
13457 auto LoadMnemonic = PPC::LDARX;
13458 auto StoreMnemonic = PPC::STDCX;
13459 switch (MI.getOpcode()) {
13460 default:
13461 llvm_unreachable("Compare and swap of unknown size");
13462 case PPC::ATOMIC_CMP_SWAP_I8:
13463 LoadMnemonic = PPC::LBARX;
13464 StoreMnemonic = PPC::STBCX;
13465 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13466 break;
13467 case PPC::ATOMIC_CMP_SWAP_I16:
13468 LoadMnemonic = PPC::LHARX;
13469 StoreMnemonic = PPC::STHCX;
13470 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13471 break;
13472 case PPC::ATOMIC_CMP_SWAP_I32:
13473 LoadMnemonic = PPC::LWARX;
13474 StoreMnemonic = PPC::STWCX;
13475 break;
13476 case PPC::ATOMIC_CMP_SWAP_I64:
13477 LoadMnemonic = PPC::LDARX;
13478 StoreMnemonic = PPC::STDCX;
13479 break;
13480 }
13481 MachineRegisterInfo &RegInfo = F->getRegInfo();
13482 Register dest = MI.getOperand(0).getReg();
13483 Register ptrA = MI.getOperand(1).getReg();
13484 Register ptrB = MI.getOperand(2).getReg();
13485 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13486 Register oldval = MI.getOperand(3).getReg();
13487 Register newval = MI.getOperand(4).getReg();
13488 DebugLoc dl = MI.getDebugLoc();
13489
13490 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13491 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13492 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13493 F->insert(It, loop1MBB);
13494 F->insert(It, loop2MBB);
13495 F->insert(It, exitMBB);
13496 exitMBB->splice(exitMBB->begin(), BB,
13497 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13499
13500 // thisMBB:
13501 // ...
13502 // fallthrough --> loopMBB
13503 BB->addSuccessor(loop1MBB);
13504
13505 // loop1MBB:
13506 // l[bhwd]arx dest, ptr
13507 // cmp[wd] dest, oldval
13508 // bne- exitBB
13509 // loop2MBB:
13510 // st[bhwd]cx. newval, ptr
13511 // bne- loopMBB
13512 // b exitBB
13513 // exitBB:
13514 BB = loop1MBB;
13515 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13516 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13517 .addReg(dest)
13518 .addReg(oldval);
13519 BuildMI(BB, dl, TII->get(PPC::BCC))
13521 .addReg(CrReg)
13522 .addMBB(exitMBB);
13523 BB->addSuccessor(loop2MBB);
13524 BB->addSuccessor(exitMBB);
13525
13526 BB = loop2MBB;
13527 BuildMI(BB, dl, TII->get(StoreMnemonic))
13528 .addReg(newval)
13529 .addReg(ptrA)
13530 .addReg(ptrB);
13531 BuildMI(BB, dl, TII->get(PPC::BCC))
13533 .addReg(PPC::CR0)
13534 .addMBB(loop1MBB);
13535 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13536 BB->addSuccessor(loop1MBB);
13537 BB->addSuccessor(exitMBB);
13538
13539 // exitMBB:
13540 // ...
13541 BB = exitMBB;
13542 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13543 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13544 // We must use 64-bit registers for addresses when targeting 64-bit,
13545 // since we're actually doing arithmetic on them. Other registers
13546 // can be 32-bit.
13547 bool is64bit = Subtarget.isPPC64();
13548 bool isLittleEndian = Subtarget.isLittleEndian();
13549 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13550
13551 Register dest = MI.getOperand(0).getReg();
13552 Register ptrA = MI.getOperand(1).getReg();
13553 Register ptrB = MI.getOperand(2).getReg();
13554 Register oldval = MI.getOperand(3).getReg();
13555 Register newval = MI.getOperand(4).getReg();
13556 DebugLoc dl = MI.getDebugLoc();
13557
13558 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13559 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13560 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13561 F->insert(It, loop1MBB);
13562 F->insert(It, loop2MBB);
13563 F->insert(It, exitMBB);
13564 exitMBB->splice(exitMBB->begin(), BB,
13565 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13567
13568 MachineRegisterInfo &RegInfo = F->getRegInfo();
13569 const TargetRegisterClass *RC =
13570 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13571 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13572
13573 Register PtrReg = RegInfo.createVirtualRegister(RC);
13574 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13575 Register ShiftReg =
13576 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13577 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13578 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13579 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13580 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13581 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13582 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13583 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13584 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13585 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13586 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13587 Register Ptr1Reg;
13588 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13589 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13590 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13591 // thisMBB:
13592 // ...
13593 // fallthrough --> loopMBB
13594 BB->addSuccessor(loop1MBB);
13595
13596 // The 4-byte load must be aligned, while a char or short may be
13597 // anywhere in the word. Hence all this nasty bookkeeping code.
13598 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13599 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13600 // xori shift, shift1, 24 [16]
13601 // rlwinm ptr, ptr1, 0, 0, 29
13602 // slw newval2, newval, shift
13603 // slw oldval2, oldval,shift
13604 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13605 // slw mask, mask2, shift
13606 // and newval3, newval2, mask
13607 // and oldval3, oldval2, mask
13608 // loop1MBB:
13609 // lwarx tmpDest, ptr
13610 // and tmp, tmpDest, mask
13611 // cmpw tmp, oldval3
13612 // bne- exitBB
13613 // loop2MBB:
13614 // andc tmp2, tmpDest, mask
13615 // or tmp4, tmp2, newval3
13616 // stwcx. tmp4, ptr
13617 // bne- loop1MBB
13618 // b exitBB
13619 // exitBB:
13620 // srw dest, tmpDest, shift
13621 if (ptrA != ZeroReg) {
13622 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13623 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13624 .addReg(ptrA)
13625 .addReg(ptrB);
13626 } else {
13627 Ptr1Reg = ptrB;
13628 }
13629
13630 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13631 // mode.
13632 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13633 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13634 .addImm(3)
13635 .addImm(27)
13636 .addImm(is8bit ? 28 : 27);
13637 if (!isLittleEndian)
13638 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13639 .addReg(Shift1Reg)
13640 .addImm(is8bit ? 24 : 16);
13641 if (is64bit)
13642 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13643 .addReg(Ptr1Reg)
13644 .addImm(0)
13645 .addImm(61);
13646 else
13647 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13648 .addReg(Ptr1Reg)
13649 .addImm(0)
13650 .addImm(0)
13651 .addImm(29);
13652 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13653 .addReg(newval)
13654 .addReg(ShiftReg);
13655 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13656 .addReg(oldval)
13657 .addReg(ShiftReg);
13658 if (is8bit)
13659 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13660 else {
13661 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13662 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13663 .addReg(Mask3Reg)
13664 .addImm(65535);
13665 }
13666 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13667 .addReg(Mask2Reg)
13668 .addReg(ShiftReg);
13669 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13670 .addReg(NewVal2Reg)
13671 .addReg(MaskReg);
13672 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13673 .addReg(OldVal2Reg)
13674 .addReg(MaskReg);
13675
13676 BB = loop1MBB;
13677 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13678 .addReg(ZeroReg)
13679 .addReg(PtrReg);
13680 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13681 .addReg(TmpDestReg)
13682 .addReg(MaskReg);
13683 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13684 .addReg(TmpReg)
13685 .addReg(OldVal3Reg);
13686 BuildMI(BB, dl, TII->get(PPC::BCC))
13688 .addReg(CrReg)
13689 .addMBB(exitMBB);
13690 BB->addSuccessor(loop2MBB);
13691 BB->addSuccessor(exitMBB);
13692
13693 BB = loop2MBB;
13694 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13695 .addReg(TmpDestReg)
13696 .addReg(MaskReg);
13697 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13698 .addReg(Tmp2Reg)
13699 .addReg(NewVal3Reg);
13700 BuildMI(BB, dl, TII->get(PPC::STWCX))
13701 .addReg(Tmp4Reg)
13702 .addReg(ZeroReg)
13703 .addReg(PtrReg);
13704 BuildMI(BB, dl, TII->get(PPC::BCC))
13706 .addReg(PPC::CR0)
13707 .addMBB(loop1MBB);
13708 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13709 BB->addSuccessor(loop1MBB);
13710 BB->addSuccessor(exitMBB);
13711
13712 // exitMBB:
13713 // ...
13714 BB = exitMBB;
13715 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13716 .addReg(TmpReg)
13717 .addReg(ShiftReg);
13718 } else if (MI.getOpcode() == PPC::FADDrtz) {
13719 // This pseudo performs an FADD with rounding mode temporarily forced
13720 // to round-to-zero. We emit this via custom inserter since the FPSCR
13721 // is not modeled at the SelectionDAG level.
13722 Register Dest = MI.getOperand(0).getReg();
13723 Register Src1 = MI.getOperand(1).getReg();
13724 Register Src2 = MI.getOperand(2).getReg();
13725 DebugLoc dl = MI.getDebugLoc();
13726
13727 MachineRegisterInfo &RegInfo = F->getRegInfo();
13728 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13729
13730 // Save FPSCR value.
13731 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13732
13733 // Set rounding mode to round-to-zero.
13734 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13735 .addImm(31)
13737
13738 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13739 .addImm(30)
13741
13742 // Perform addition.
13743 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13744 .addReg(Src1)
13745 .addReg(Src2);
13746 if (MI.getFlag(MachineInstr::NoFPExcept))
13748
13749 // Restore FPSCR value.
13750 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13751 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13752 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13753 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13754 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13755 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13756 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13757 ? PPC::ANDI8_rec
13758 : PPC::ANDI_rec;
13759 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13760 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13761
13762 MachineRegisterInfo &RegInfo = F->getRegInfo();
13763 Register Dest = RegInfo.createVirtualRegister(
13764 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13765
13766 DebugLoc Dl = MI.getDebugLoc();
13767 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13768 .addReg(MI.getOperand(1).getReg())
13769 .addImm(1);
13770 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13771 MI.getOperand(0).getReg())
13772 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13773 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13774 DebugLoc Dl = MI.getDebugLoc();
13775 MachineRegisterInfo &RegInfo = F->getRegInfo();
13776 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13777 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13778 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13779 MI.getOperand(0).getReg())
13780 .addReg(CRReg);
13781 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13782 DebugLoc Dl = MI.getDebugLoc();
13783 unsigned Imm = MI.getOperand(1).getImm();
13784 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13785 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13786 MI.getOperand(0).getReg())
13787 .addReg(PPC::CR0EQ);
13788 } else if (MI.getOpcode() == PPC::SETRNDi) {
13789 DebugLoc dl = MI.getDebugLoc();
13790 Register OldFPSCRReg = MI.getOperand(0).getReg();
13791
13792 // Save FPSCR value.
13793 if (MRI.use_empty(OldFPSCRReg))
13794 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13795 else
13796 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13797
13798 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13799 // the following settings:
13800 // 00 Round to nearest
13801 // 01 Round to 0
13802 // 10 Round to +inf
13803 // 11 Round to -inf
13804
13805 // When the operand is immediate, using the two least significant bits of
13806 // the immediate to set the bits 62:63 of FPSCR.
13807 unsigned Mode = MI.getOperand(1).getImm();
13808 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13809 .addImm(31)
13811
13812 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13813 .addImm(30)
13815 } else if (MI.getOpcode() == PPC::SETRND) {
13816 DebugLoc dl = MI.getDebugLoc();
13817
13818 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13819 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13820 // If the target doesn't have DirectMove, we should use stack to do the
13821 // conversion, because the target doesn't have the instructions like mtvsrd
13822 // or mfvsrd to do this conversion directly.
13823 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13824 if (Subtarget.hasDirectMove()) {
13825 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13826 .addReg(SrcReg);
13827 } else {
13828 // Use stack to do the register copy.
13829 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13830 MachineRegisterInfo &RegInfo = F->getRegInfo();
13831 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13832 if (RC == &PPC::F8RCRegClass) {
13833 // Copy register from F8RCRegClass to G8RCRegclass.
13834 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13835 "Unsupported RegClass.");
13836
13837 StoreOp = PPC::STFD;
13838 LoadOp = PPC::LD;
13839 } else {
13840 // Copy register from G8RCRegClass to F8RCRegclass.
13841 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13842 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13843 "Unsupported RegClass.");
13844 }
13845
13846 MachineFrameInfo &MFI = F->getFrameInfo();
13847 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13848
13849 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13850 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13852 MFI.getObjectAlign(FrameIdx));
13853
13854 // Store the SrcReg into the stack.
13855 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13856 .addReg(SrcReg)
13857 .addImm(0)
13858 .addFrameIndex(FrameIdx)
13859 .addMemOperand(MMOStore);
13860
13861 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13862 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13864 MFI.getObjectAlign(FrameIdx));
13865
13866 // Load from the stack where SrcReg is stored, and save to DestReg,
13867 // so we have done the RegClass conversion from RegClass::SrcReg to
13868 // RegClass::DestReg.
13869 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13870 .addImm(0)
13871 .addFrameIndex(FrameIdx)
13872 .addMemOperand(MMOLoad);
13873 }
13874 };
13875
13876 Register OldFPSCRReg = MI.getOperand(0).getReg();
13877
13878 // Save FPSCR value.
13879 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13880
13881 // When the operand is gprc register, use two least significant bits of the
13882 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13883 //
13884 // copy OldFPSCRTmpReg, OldFPSCRReg
13885 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13886 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13887 // copy NewFPSCRReg, NewFPSCRTmpReg
13888 // mtfsf 255, NewFPSCRReg
13889 MachineOperand SrcOp = MI.getOperand(1);
13890 MachineRegisterInfo &RegInfo = F->getRegInfo();
13891 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13892
13893 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13894
13895 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13896 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13897
13898 // The first operand of INSERT_SUBREG should be a register which has
13899 // subregisters, we only care about its RegClass, so we should use an
13900 // IMPLICIT_DEF register.
13901 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13902 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13903 .addReg(ImDefReg)
13904 .add(SrcOp)
13905 .addImm(1);
13906
13907 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13908 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13909 .addReg(OldFPSCRTmpReg)
13910 .addReg(ExtSrcReg)
13911 .addImm(0)
13912 .addImm(62);
13913
13914 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13915 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13916
13917 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13918 // bits of FPSCR.
13919 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13920 .addImm(255)
13921 .addReg(NewFPSCRReg)
13922 .addImm(0)
13923 .addImm(0);
13924 } else if (MI.getOpcode() == PPC::SETFLM) {
13925 DebugLoc Dl = MI.getDebugLoc();
13926
13927 // Result of setflm is previous FPSCR content, so we need to save it first.
13928 Register OldFPSCRReg = MI.getOperand(0).getReg();
13929 if (MRI.use_empty(OldFPSCRReg))
13930 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13931 else
13932 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13933
13934 // Put bits in 32:63 to FPSCR.
13935 Register NewFPSCRReg = MI.getOperand(1).getReg();
13936 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13937 .addImm(255)
13938 .addReg(NewFPSCRReg)
13939 .addImm(0)
13940 .addImm(0);
13941 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13942 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13943 return emitProbedAlloca(MI, BB);
13944 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13945 DebugLoc DL = MI.getDebugLoc();
13946 Register Src = MI.getOperand(2).getReg();
13947 Register Lo = MI.getOperand(0).getReg();
13948 Register Hi = MI.getOperand(1).getReg();
13949 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13950 .addDef(Lo)
13951 .addUse(Src, 0, PPC::sub_gp8_x1);
13952 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13953 .addDef(Hi)
13954 .addUse(Src, 0, PPC::sub_gp8_x0);
13955 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13956 MI.getOpcode() == PPC::STQX_PSEUDO) {
13957 DebugLoc DL = MI.getDebugLoc();
13958 // Ptr is used as the ptr_rc_no_r0 part
13959 // of LQ/STQ's memory operand and adding result of RA and RB,
13960 // so it has to be g8rc_and_g8rc_nox0.
13961 Register Ptr =
13962 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13963 Register Val = MI.getOperand(0).getReg();
13964 Register RA = MI.getOperand(1).getReg();
13965 Register RB = MI.getOperand(2).getReg();
13966 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13967 BuildMI(*BB, MI, DL,
13968 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13969 : TII->get(PPC::STQ))
13970 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13971 .addImm(0)
13972 .addReg(Ptr);
13973 } else {
13974 llvm_unreachable("Unexpected instr type to insert");
13975 }
13976
13977 MI.eraseFromParent(); // The pseudo instruction is gone now.
13978 return BB;
13979}
13980
13981//===----------------------------------------------------------------------===//
13982// Target Optimization Hooks
13983//===----------------------------------------------------------------------===//
13984
13985static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13986 // For the estimates, convergence is quadratic, so we essentially double the
13987 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13988 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13989 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13990 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13991 if (VT.getScalarType() == MVT::f64)
13992 RefinementSteps++;
13993 return RefinementSteps;
13994}
13995
13996SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13997 const DenormalMode &Mode) const {
13998 // We only have VSX Vector Test for software Square Root.
13999 EVT VT = Op.getValueType();
14000 if (!isTypeLegal(MVT::i1) ||
14001 (VT != MVT::f64 &&
14002 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14003 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
14004
14005 SDLoc DL(Op);
14006 // The output register of FTSQRT is CR field.
14007 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14008 // ftsqrt BF,FRB
14009 // Let e_b be the unbiased exponent of the double-precision
14010 // floating-point operand in register FRB.
14011 // fe_flag is set to 1 if either of the following conditions occurs.
14012 // - The double-precision floating-point operand in register FRB is a zero,
14013 // a NaN, or an infinity, or a negative value.
14014 // - e_b is less than or equal to -970.
14015 // Otherwise fe_flag is set to 0.
14016 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14017 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14018 // exponent is less than -970)
14019 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14020 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14021 FTSQRT, SRIdxVal),
14022 0);
14023}
14024
14025SDValue
14026PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14027 SelectionDAG &DAG) const {
14028 // We only have VSX Vector Square Root.
14029 EVT VT = Op.getValueType();
14030 if (VT != MVT::f64 &&
14031 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14033
14034 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14035}
14036
14037SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14038 int Enabled, int &RefinementSteps,
14039 bool &UseOneConstNR,
14040 bool Reciprocal) const {
14041 EVT VT = Operand.getValueType();
14042 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14043 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14044 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14045 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14046 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14047 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14048
14049 // The Newton-Raphson computation with a single constant does not provide
14050 // enough accuracy on some CPUs.
14051 UseOneConstNR = !Subtarget.needsTwoConstNR();
14052 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14053 }
14054 return SDValue();
14055}
14056
14057SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14058 int Enabled,
14059 int &RefinementSteps) const {
14060 EVT VT = Operand.getValueType();
14061 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14062 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14063 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14064 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14065 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14066 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14067 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14068 }
14069 return SDValue();
14070}
14071
14072unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14073 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14074 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14075 // enabled for division), this functionality is redundant with the default
14076 // combiner logic (once the division -> reciprocal/multiply transformation
14077 // has taken place). As a result, this matters more for older cores than for
14078 // newer ones.
14079
14080 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14081 // reciprocal if there are two or more FDIVs (for embedded cores with only
14082 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14083 switch (Subtarget.getCPUDirective()) {
14084 default:
14085 return 3;
14086 case PPC::DIR_440:
14087 case PPC::DIR_A2:
14088 case PPC::DIR_E500:
14089 case PPC::DIR_E500mc:
14090 case PPC::DIR_E5500:
14091 return 2;
14092 }
14093}
14094
14095// isConsecutiveLSLoc needs to work even if all adds have not yet been
14096// collapsed, and so we need to look through chains of them.
14098 int64_t& Offset, SelectionDAG &DAG) {
14099 if (DAG.isBaseWithConstantOffset(Loc)) {
14100 Base = Loc.getOperand(0);
14101 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14102
14103 // The base might itself be a base plus an offset, and if so, accumulate
14104 // that as well.
14106 }
14107}
14108
14110 unsigned Bytes, int Dist,
14111 SelectionDAG &DAG) {
14112 if (VT.getSizeInBits() / 8 != Bytes)
14113 return false;
14114
14115 SDValue BaseLoc = Base->getBasePtr();
14116 if (Loc.getOpcode() == ISD::FrameIndex) {
14117 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14118 return false;
14120 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14121 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14122 int FS = MFI.getObjectSize(FI);
14123 int BFS = MFI.getObjectSize(BFI);
14124 if (FS != BFS || FS != (int)Bytes) return false;
14125 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14126 }
14127
14128 SDValue Base1 = Loc, Base2 = BaseLoc;
14129 int64_t Offset1 = 0, Offset2 = 0;
14130 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14131 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14132 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14133 return true;
14134
14135 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14136 const GlobalValue *GV1 = nullptr;
14137 const GlobalValue *GV2 = nullptr;
14138 Offset1 = 0;
14139 Offset2 = 0;
14140 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14141 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14142 if (isGA1 && isGA2 && GV1 == GV2)
14143 return Offset1 == (Offset2 + Dist*Bytes);
14144 return false;
14145}
14146
14147// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14148// not enforce equality of the chain operands.
14150 unsigned Bytes, int Dist,
14151 SelectionDAG &DAG) {
14152 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
14153 EVT VT = LS->getMemoryVT();
14154 SDValue Loc = LS->getBasePtr();
14155 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14156 }
14157
14158 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14159 EVT VT;
14160 switch (N->getConstantOperandVal(1)) {
14161 default: return false;
14162 case Intrinsic::ppc_altivec_lvx:
14163 case Intrinsic::ppc_altivec_lvxl:
14164 case Intrinsic::ppc_vsx_lxvw4x:
14165 case Intrinsic::ppc_vsx_lxvw4x_be:
14166 VT = MVT::v4i32;
14167 break;
14168 case Intrinsic::ppc_vsx_lxvd2x:
14169 case Intrinsic::ppc_vsx_lxvd2x_be:
14170 VT = MVT::v2f64;
14171 break;
14172 case Intrinsic::ppc_altivec_lvebx:
14173 VT = MVT::i8;
14174 break;
14175 case Intrinsic::ppc_altivec_lvehx:
14176 VT = MVT::i16;
14177 break;
14178 case Intrinsic::ppc_altivec_lvewx:
14179 VT = MVT::i32;
14180 break;
14181 }
14182
14183 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14184 }
14185
14186 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14187 EVT VT;
14188 switch (N->getConstantOperandVal(1)) {
14189 default: return false;
14190 case Intrinsic::ppc_altivec_stvx:
14191 case Intrinsic::ppc_altivec_stvxl:
14192 case Intrinsic::ppc_vsx_stxvw4x:
14193 VT = MVT::v4i32;
14194 break;
14195 case Intrinsic::ppc_vsx_stxvd2x:
14196 VT = MVT::v2f64;
14197 break;
14198 case Intrinsic::ppc_vsx_stxvw4x_be:
14199 VT = MVT::v4i32;
14200 break;
14201 case Intrinsic::ppc_vsx_stxvd2x_be:
14202 VT = MVT::v2f64;
14203 break;
14204 case Intrinsic::ppc_altivec_stvebx:
14205 VT = MVT::i8;
14206 break;
14207 case Intrinsic::ppc_altivec_stvehx:
14208 VT = MVT::i16;
14209 break;
14210 case Intrinsic::ppc_altivec_stvewx:
14211 VT = MVT::i32;
14212 break;
14213 }
14214
14215 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14216 }
14217
14218 return false;
14219}
14220
14221// Return true is there is a nearyby consecutive load to the one provided
14222// (regardless of alignment). We search up and down the chain, looking though
14223// token factors and other loads (but nothing else). As a result, a true result
14224// indicates that it is safe to create a new consecutive load adjacent to the
14225// load provided.
14227 SDValue Chain = LD->getChain();
14228 EVT VT = LD->getMemoryVT();
14229
14230 SmallSet<SDNode *, 16> LoadRoots;
14231 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14232 SmallSet<SDNode *, 16> Visited;
14233
14234 // First, search up the chain, branching to follow all token-factor operands.
14235 // If we find a consecutive load, then we're done, otherwise, record all
14236 // nodes just above the top-level loads and token factors.
14237 while (!Queue.empty()) {
14238 SDNode *ChainNext = Queue.pop_back_val();
14239 if (!Visited.insert(ChainNext).second)
14240 continue;
14241
14242 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14243 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14244 return true;
14245
14246 if (!Visited.count(ChainLD->getChain().getNode()))
14247 Queue.push_back(ChainLD->getChain().getNode());
14248 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14249 for (const SDUse &O : ChainNext->ops())
14250 if (!Visited.count(O.getNode()))
14251 Queue.push_back(O.getNode());
14252 } else
14253 LoadRoots.insert(ChainNext);
14254 }
14255
14256 // Second, search down the chain, starting from the top-level nodes recorded
14257 // in the first phase. These top-level nodes are the nodes just above all
14258 // loads and token factors. Starting with their uses, recursively look though
14259 // all loads (just the chain uses) and token factors to find a consecutive
14260 // load.
14261 Visited.clear();
14262 Queue.clear();
14263
14264 for (SDNode *I : LoadRoots) {
14265 Queue.push_back(I);
14266
14267 while (!Queue.empty()) {
14268 SDNode *LoadRoot = Queue.pop_back_val();
14269 if (!Visited.insert(LoadRoot).second)
14270 continue;
14271
14272 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14273 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14274 return true;
14275
14276 for (SDNode *U : LoadRoot->users())
14277 if (((isa<MemSDNode>(U) &&
14278 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14279 U->getOpcode() == ISD::TokenFactor) &&
14280 !Visited.count(U))
14281 Queue.push_back(U);
14282 }
14283 }
14284
14285 return false;
14286}
14287
14288/// This function is called when we have proved that a SETCC node can be replaced
14289/// by subtraction (and other supporting instructions) so that the result of
14290/// comparison is kept in a GPR instead of CR. This function is purely for
14291/// codegen purposes and has some flags to guide the codegen process.
14292static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14293 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14294 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14295
14296 // Zero extend the operands to the largest legal integer. Originally, they
14297 // must be of a strictly smaller size.
14298 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14299 DAG.getConstant(Size, DL, MVT::i32));
14300 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14301 DAG.getConstant(Size, DL, MVT::i32));
14302
14303 // Swap if needed. Depends on the condition code.
14304 if (Swap)
14305 std::swap(Op0, Op1);
14306
14307 // Subtract extended integers.
14308 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14309
14310 // Move the sign bit to the least significant position and zero out the rest.
14311 // Now the least significant bit carries the result of original comparison.
14312 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14313 DAG.getConstant(Size - 1, DL, MVT::i32));
14314 auto Final = Shifted;
14315
14316 // Complement the result if needed. Based on the condition code.
14317 if (Complement)
14318 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14319 DAG.getConstant(1, DL, MVT::i64));
14320
14321 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14322}
14323
14324SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14325 DAGCombinerInfo &DCI) const {
14326 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14327
14328 SelectionDAG &DAG = DCI.DAG;
14329 SDLoc DL(N);
14330
14331 // Size of integers being compared has a critical role in the following
14332 // analysis, so we prefer to do this when all types are legal.
14333 if (!DCI.isAfterLegalizeDAG())
14334 return SDValue();
14335
14336 // If all users of SETCC extend its value to a legal integer type
14337 // then we replace SETCC with a subtraction
14338 for (const SDNode *U : N->users())
14339 if (U->getOpcode() != ISD::ZERO_EXTEND)
14340 return SDValue();
14341
14342 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14343 auto OpSize = N->getOperand(0).getValueSizeInBits();
14344
14346
14347 if (OpSize < Size) {
14348 switch (CC) {
14349 default: break;
14350 case ISD::SETULT:
14351 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14352 case ISD::SETULE:
14353 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14354 case ISD::SETUGT:
14355 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14356 case ISD::SETUGE:
14357 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14358 }
14359 }
14360
14361 return SDValue();
14362}
14363
14364SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14365 DAGCombinerInfo &DCI) const {
14366 SelectionDAG &DAG = DCI.DAG;
14367 SDLoc dl(N);
14368
14369 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14370 // If we're tracking CR bits, we need to be careful that we don't have:
14371 // trunc(binary-ops(zext(x), zext(y)))
14372 // or
14373 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14374 // such that we're unnecessarily moving things into GPRs when it would be
14375 // better to keep them in CR bits.
14376
14377 // Note that trunc here can be an actual i1 trunc, or can be the effective
14378 // truncation that comes from a setcc or select_cc.
14379 if (N->getOpcode() == ISD::TRUNCATE &&
14380 N->getValueType(0) != MVT::i1)
14381 return SDValue();
14382
14383 if (N->getOperand(0).getValueType() != MVT::i32 &&
14384 N->getOperand(0).getValueType() != MVT::i64)
14385 return SDValue();
14386
14387 if (N->getOpcode() == ISD::SETCC ||
14388 N->getOpcode() == ISD::SELECT_CC) {
14389 // If we're looking at a comparison, then we need to make sure that the
14390 // high bits (all except for the first) don't matter the result.
14392 cast<CondCodeSDNode>(N->getOperand(
14393 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14394 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14395
14397 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14398 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14399 return SDValue();
14400 } else if (ISD::isUnsignedIntSetCC(CC)) {
14401 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14402 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14403 !DAG.MaskedValueIsZero(N->getOperand(1),
14404 APInt::getHighBitsSet(OpBits, OpBits-1)))
14405 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14406 : SDValue());
14407 } else {
14408 // This is neither a signed nor an unsigned comparison, just make sure
14409 // that the high bits are equal.
14410 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14411 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14412
14413 // We don't really care about what is known about the first bit (if
14414 // anything), so pretend that it is known zero for both to ensure they can
14415 // be compared as constants.
14416 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14417 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14418
14419 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14420 Op1Known.getConstant() != Op2Known.getConstant())
14421 return SDValue();
14422 }
14423 }
14424
14425 // We now know that the higher-order bits are irrelevant, we just need to
14426 // make sure that all of the intermediate operations are bit operations, and
14427 // all inputs are extensions.
14428 if (N->getOperand(0).getOpcode() != ISD::AND &&
14429 N->getOperand(0).getOpcode() != ISD::OR &&
14430 N->getOperand(0).getOpcode() != ISD::XOR &&
14431 N->getOperand(0).getOpcode() != ISD::SELECT &&
14432 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14433 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14434 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14435 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14436 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14437 return SDValue();
14438
14439 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14440 N->getOperand(1).getOpcode() != ISD::AND &&
14441 N->getOperand(1).getOpcode() != ISD::OR &&
14442 N->getOperand(1).getOpcode() != ISD::XOR &&
14443 N->getOperand(1).getOpcode() != ISD::SELECT &&
14444 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14445 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14446 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14447 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14448 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14449 return SDValue();
14450
14452 SmallVector<SDValue, 8> BinOps, PromOps;
14454
14455 for (unsigned i = 0; i < 2; ++i) {
14456 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14457 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14458 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14459 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14460 isa<ConstantSDNode>(N->getOperand(i)))
14461 Inputs.push_back(N->getOperand(i));
14462 else
14463 BinOps.push_back(N->getOperand(i));
14464
14465 if (N->getOpcode() == ISD::TRUNCATE)
14466 break;
14467 }
14468
14469 // Visit all inputs, collect all binary operations (and, or, xor and
14470 // select) that are all fed by extensions.
14471 while (!BinOps.empty()) {
14472 SDValue BinOp = BinOps.pop_back_val();
14473
14474 if (!Visited.insert(BinOp.getNode()).second)
14475 continue;
14476
14477 PromOps.push_back(BinOp);
14478
14479 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14480 // The condition of the select is not promoted.
14481 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14482 continue;
14483 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14484 continue;
14485
14486 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14487 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14488 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14489 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14490 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14491 Inputs.push_back(BinOp.getOperand(i));
14492 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14493 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14494 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14495 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14496 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14497 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14498 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14499 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14500 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14501 BinOps.push_back(BinOp.getOperand(i));
14502 } else {
14503 // We have an input that is not an extension or another binary
14504 // operation; we'll abort this transformation.
14505 return SDValue();
14506 }
14507 }
14508 }
14509
14510 // Make sure that this is a self-contained cluster of operations (which
14511 // is not quite the same thing as saying that everything has only one
14512 // use).
14513 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14514 if (isa<ConstantSDNode>(Inputs[i]))
14515 continue;
14516
14517 for (const SDNode *User : Inputs[i].getNode()->users()) {
14518 if (User != N && !Visited.count(User))
14519 return SDValue();
14520
14521 // Make sure that we're not going to promote the non-output-value
14522 // operand(s) or SELECT or SELECT_CC.
14523 // FIXME: Although we could sometimes handle this, and it does occur in
14524 // practice that one of the condition inputs to the select is also one of
14525 // the outputs, we currently can't deal with this.
14526 if (User->getOpcode() == ISD::SELECT) {
14527 if (User->getOperand(0) == Inputs[i])
14528 return SDValue();
14529 } else if (User->getOpcode() == ISD::SELECT_CC) {
14530 if (User->getOperand(0) == Inputs[i] ||
14531 User->getOperand(1) == Inputs[i])
14532 return SDValue();
14533 }
14534 }
14535 }
14536
14537 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14538 for (const SDNode *User : PromOps[i].getNode()->users()) {
14539 if (User != N && !Visited.count(User))
14540 return SDValue();
14541
14542 // Make sure that we're not going to promote the non-output-value
14543 // operand(s) or SELECT or SELECT_CC.
14544 // FIXME: Although we could sometimes handle this, and it does occur in
14545 // practice that one of the condition inputs to the select is also one of
14546 // the outputs, we currently can't deal with this.
14547 if (User->getOpcode() == ISD::SELECT) {
14548 if (User->getOperand(0) == PromOps[i])
14549 return SDValue();
14550 } else if (User->getOpcode() == ISD::SELECT_CC) {
14551 if (User->getOperand(0) == PromOps[i] ||
14552 User->getOperand(1) == PromOps[i])
14553 return SDValue();
14554 }
14555 }
14556 }
14557
14558 // Replace all inputs with the extension operand.
14559 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14560 // Constants may have users outside the cluster of to-be-promoted nodes,
14561 // and so we need to replace those as we do the promotions.
14562 if (isa<ConstantSDNode>(Inputs[i]))
14563 continue;
14564 else
14565 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14566 }
14567
14568 std::list<HandleSDNode> PromOpHandles;
14569 for (auto &PromOp : PromOps)
14570 PromOpHandles.emplace_back(PromOp);
14571
14572 // Replace all operations (these are all the same, but have a different
14573 // (i1) return type). DAG.getNode will validate that the types of
14574 // a binary operator match, so go through the list in reverse so that
14575 // we've likely promoted both operands first. Any intermediate truncations or
14576 // extensions disappear.
14577 while (!PromOpHandles.empty()) {
14578 SDValue PromOp = PromOpHandles.back().getValue();
14579 PromOpHandles.pop_back();
14580
14581 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14582 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14583 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14584 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14585 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14586 PromOp.getOperand(0).getValueType() != MVT::i1) {
14587 // The operand is not yet ready (see comment below).
14588 PromOpHandles.emplace_front(PromOp);
14589 continue;
14590 }
14591
14592 SDValue RepValue = PromOp.getOperand(0);
14593 if (isa<ConstantSDNode>(RepValue))
14594 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14595
14596 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14597 continue;
14598 }
14599
14600 unsigned C;
14601 switch (PromOp.getOpcode()) {
14602 default: C = 0; break;
14603 case ISD::SELECT: C = 1; break;
14604 case ISD::SELECT_CC: C = 2; break;
14605 }
14606
14607 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14608 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14609 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14610 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14611 // The to-be-promoted operands of this node have not yet been
14612 // promoted (this should be rare because we're going through the
14613 // list backward, but if one of the operands has several users in
14614 // this cluster of to-be-promoted nodes, it is possible).
14615 PromOpHandles.emplace_front(PromOp);
14616 continue;
14617 }
14618
14619 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14620
14621 // If there are any constant inputs, make sure they're replaced now.
14622 for (unsigned i = 0; i < 2; ++i)
14623 if (isa<ConstantSDNode>(Ops[C+i]))
14624 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14625
14626 DAG.ReplaceAllUsesOfValueWith(PromOp,
14627 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14628 }
14629
14630 // Now we're left with the initial truncation itself.
14631 if (N->getOpcode() == ISD::TRUNCATE)
14632 return N->getOperand(0);
14633
14634 // Otherwise, this is a comparison. The operands to be compared have just
14635 // changed type (to i1), but everything else is the same.
14636 return SDValue(N, 0);
14637}
14638
14639SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14640 DAGCombinerInfo &DCI) const {
14641 SelectionDAG &DAG = DCI.DAG;
14642 SDLoc dl(N);
14643
14644 // If we're tracking CR bits, we need to be careful that we don't have:
14645 // zext(binary-ops(trunc(x), trunc(y)))
14646 // or
14647 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14648 // such that we're unnecessarily moving things into CR bits that can more
14649 // efficiently stay in GPRs. Note that if we're not certain that the high
14650 // bits are set as required by the final extension, we still may need to do
14651 // some masking to get the proper behavior.
14652
14653 // This same functionality is important on PPC64 when dealing with
14654 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14655 // the return values of functions. Because it is so similar, it is handled
14656 // here as well.
14657
14658 if (N->getValueType(0) != MVT::i32 &&
14659 N->getValueType(0) != MVT::i64)
14660 return SDValue();
14661
14662 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14663 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14664 return SDValue();
14665
14666 if (N->getOperand(0).getOpcode() != ISD::AND &&
14667 N->getOperand(0).getOpcode() != ISD::OR &&
14668 N->getOperand(0).getOpcode() != ISD::XOR &&
14669 N->getOperand(0).getOpcode() != ISD::SELECT &&
14670 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14671 return SDValue();
14672
14674 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14676
14677 // Visit all inputs, collect all binary operations (and, or, xor and
14678 // select) that are all fed by truncations.
14679 while (!BinOps.empty()) {
14680 SDValue BinOp = BinOps.pop_back_val();
14681
14682 if (!Visited.insert(BinOp.getNode()).second)
14683 continue;
14684
14685 PromOps.push_back(BinOp);
14686
14687 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14688 // The condition of the select is not promoted.
14689 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14690 continue;
14691 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14692 continue;
14693
14694 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14695 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14696 Inputs.push_back(BinOp.getOperand(i));
14697 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14698 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14699 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14700 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14701 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14702 BinOps.push_back(BinOp.getOperand(i));
14703 } else {
14704 // We have an input that is not a truncation or another binary
14705 // operation; we'll abort this transformation.
14706 return SDValue();
14707 }
14708 }
14709 }
14710
14711 // The operands of a select that must be truncated when the select is
14712 // promoted because the operand is actually part of the to-be-promoted set.
14713 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14714
14715 // Make sure that this is a self-contained cluster of operations (which
14716 // is not quite the same thing as saying that everything has only one
14717 // use).
14718 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14719 if (isa<ConstantSDNode>(Inputs[i]))
14720 continue;
14721
14722 for (SDNode *User : Inputs[i].getNode()->users()) {
14723 if (User != N && !Visited.count(User))
14724 return SDValue();
14725
14726 // If we're going to promote the non-output-value operand(s) or SELECT or
14727 // SELECT_CC, record them for truncation.
14728 if (User->getOpcode() == ISD::SELECT) {
14729 if (User->getOperand(0) == Inputs[i])
14730 SelectTruncOp[0].insert(std::make_pair(User,
14731 User->getOperand(0).getValueType()));
14732 } else if (User->getOpcode() == ISD::SELECT_CC) {
14733 if (User->getOperand(0) == Inputs[i])
14734 SelectTruncOp[0].insert(std::make_pair(User,
14735 User->getOperand(0).getValueType()));
14736 if (User->getOperand(1) == Inputs[i])
14737 SelectTruncOp[1].insert(std::make_pair(User,
14738 User->getOperand(1).getValueType()));
14739 }
14740 }
14741 }
14742
14743 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14744 for (SDNode *User : PromOps[i].getNode()->users()) {
14745 if (User != N && !Visited.count(User))
14746 return SDValue();
14747
14748 // If we're going to promote the non-output-value operand(s) or SELECT or
14749 // SELECT_CC, record them for truncation.
14750 if (User->getOpcode() == ISD::SELECT) {
14751 if (User->getOperand(0) == PromOps[i])
14752 SelectTruncOp[0].insert(std::make_pair(User,
14753 User->getOperand(0).getValueType()));
14754 } else if (User->getOpcode() == ISD::SELECT_CC) {
14755 if (User->getOperand(0) == PromOps[i])
14756 SelectTruncOp[0].insert(std::make_pair(User,
14757 User->getOperand(0).getValueType()));
14758 if (User->getOperand(1) == PromOps[i])
14759 SelectTruncOp[1].insert(std::make_pair(User,
14760 User->getOperand(1).getValueType()));
14761 }
14762 }
14763 }
14764
14765 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14766 bool ReallyNeedsExt = false;
14767 if (N->getOpcode() != ISD::ANY_EXTEND) {
14768 // If all of the inputs are not already sign/zero extended, then
14769 // we'll still need to do that at the end.
14770 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14771 if (isa<ConstantSDNode>(Inputs[i]))
14772 continue;
14773
14774 unsigned OpBits =
14775 Inputs[i].getOperand(0).getValueSizeInBits();
14776 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14777
14778 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14779 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14780 APInt::getHighBitsSet(OpBits,
14781 OpBits-PromBits))) ||
14782 (N->getOpcode() == ISD::SIGN_EXTEND &&
14783 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14784 (OpBits-(PromBits-1)))) {
14785 ReallyNeedsExt = true;
14786 break;
14787 }
14788 }
14789 }
14790
14791 // Replace all inputs, either with the truncation operand, or a
14792 // truncation or extension to the final output type.
14793 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14794 // Constant inputs need to be replaced with the to-be-promoted nodes that
14795 // use them because they might have users outside of the cluster of
14796 // promoted nodes.
14797 if (isa<ConstantSDNode>(Inputs[i]))
14798 continue;
14799
14800 SDValue InSrc = Inputs[i].getOperand(0);
14801 if (Inputs[i].getValueType() == N->getValueType(0))
14802 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14803 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14804 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14805 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14806 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14807 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14808 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14809 else
14810 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14811 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14812 }
14813
14814 std::list<HandleSDNode> PromOpHandles;
14815 for (auto &PromOp : PromOps)
14816 PromOpHandles.emplace_back(PromOp);
14817
14818 // Replace all operations (these are all the same, but have a different
14819 // (promoted) return type). DAG.getNode will validate that the types of
14820 // a binary operator match, so go through the list in reverse so that
14821 // we've likely promoted both operands first.
14822 while (!PromOpHandles.empty()) {
14823 SDValue PromOp = PromOpHandles.back().getValue();
14824 PromOpHandles.pop_back();
14825
14826 unsigned C;
14827 switch (PromOp.getOpcode()) {
14828 default: C = 0; break;
14829 case ISD::SELECT: C = 1; break;
14830 case ISD::SELECT_CC: C = 2; break;
14831 }
14832
14833 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14834 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14835 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14836 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14837 // The to-be-promoted operands of this node have not yet been
14838 // promoted (this should be rare because we're going through the
14839 // list backward, but if one of the operands has several users in
14840 // this cluster of to-be-promoted nodes, it is possible).
14841 PromOpHandles.emplace_front(PromOp);
14842 continue;
14843 }
14844
14845 // For SELECT and SELECT_CC nodes, we do a similar check for any
14846 // to-be-promoted comparison inputs.
14847 if (PromOp.getOpcode() == ISD::SELECT ||
14848 PromOp.getOpcode() == ISD::SELECT_CC) {
14849 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14850 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14851 (SelectTruncOp[1].count(PromOp.getNode()) &&
14852 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14853 PromOpHandles.emplace_front(PromOp);
14854 continue;
14855 }
14856 }
14857
14859 PromOp.getNode()->op_end());
14860
14861 // If this node has constant inputs, then they'll need to be promoted here.
14862 for (unsigned i = 0; i < 2; ++i) {
14863 if (!isa<ConstantSDNode>(Ops[C+i]))
14864 continue;
14865 if (Ops[C+i].getValueType() == N->getValueType(0))
14866 continue;
14867
14868 if (N->getOpcode() == ISD::SIGN_EXTEND)
14869 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14870 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14871 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14872 else
14873 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14874 }
14875
14876 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14877 // truncate them again to the original value type.
14878 if (PromOp.getOpcode() == ISD::SELECT ||
14879 PromOp.getOpcode() == ISD::SELECT_CC) {
14880 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14881 if (SI0 != SelectTruncOp[0].end())
14882 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14883 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14884 if (SI1 != SelectTruncOp[1].end())
14885 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14886 }
14887
14888 DAG.ReplaceAllUsesOfValueWith(PromOp,
14889 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14890 }
14891
14892 // Now we're left with the initial extension itself.
14893 if (!ReallyNeedsExt)
14894 return N->getOperand(0);
14895
14896 // To zero extend, just mask off everything except for the first bit (in the
14897 // i1 case).
14898 if (N->getOpcode() == ISD::ZERO_EXTEND)
14899 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14901 N->getValueSizeInBits(0), PromBits),
14902 dl, N->getValueType(0)));
14903
14904 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14905 "Invalid extension type");
14906 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14907 SDValue ShiftCst =
14908 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14909 return DAG.getNode(
14910 ISD::SRA, dl, N->getValueType(0),
14911 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14912 ShiftCst);
14913}
14914
14915SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14916 DAGCombinerInfo &DCI) const {
14917 assert(N->getOpcode() == ISD::SETCC &&
14918 "Should be called with a SETCC node");
14919
14920 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14921 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14922 SDValue LHS = N->getOperand(0);
14923 SDValue RHS = N->getOperand(1);
14924
14925 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14926 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14927 LHS.hasOneUse())
14928 std::swap(LHS, RHS);
14929
14930 // x == 0-y --> x+y == 0
14931 // x != 0-y --> x+y != 0
14932 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14933 RHS.hasOneUse()) {
14934 SDLoc DL(N);
14935 SelectionDAG &DAG = DCI.DAG;
14936 EVT VT = N->getValueType(0);
14937 EVT OpVT = LHS.getValueType();
14938 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14939 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14940 }
14941 }
14942
14943 return DAGCombineTruncBoolExt(N, DCI);
14944}
14945
14946// Is this an extending load from an f32 to an f64?
14947static bool isFPExtLoad(SDValue Op) {
14948 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14949 return LD->getExtensionType() == ISD::EXTLOAD &&
14950 Op.getValueType() == MVT::f64;
14951 return false;
14952}
14953
14954/// Reduces the number of fp-to-int conversion when building a vector.
14955///
14956/// If this vector is built out of floating to integer conversions,
14957/// transform it to a vector built out of floating point values followed by a
14958/// single floating to integer conversion of the vector.
14959/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14960/// becomes (fptosi (build_vector ($A, $B, ...)))
14961SDValue PPCTargetLowering::
14962combineElementTruncationToVectorTruncation(SDNode *N,
14963 DAGCombinerInfo &DCI) const {
14964 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14965 "Should be called with a BUILD_VECTOR node");
14966
14967 SelectionDAG &DAG = DCI.DAG;
14968 SDLoc dl(N);
14969
14970 SDValue FirstInput = N->getOperand(0);
14971 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14972 "The input operand must be an fp-to-int conversion.");
14973
14974 // This combine happens after legalization so the fp_to_[su]i nodes are
14975 // already converted to PPCSISD nodes.
14976 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14977 if (FirstConversion == PPCISD::FCTIDZ ||
14978 FirstConversion == PPCISD::FCTIDUZ ||
14979 FirstConversion == PPCISD::FCTIWZ ||
14980 FirstConversion == PPCISD::FCTIWUZ) {
14981 bool IsSplat = true;
14982 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14983 FirstConversion == PPCISD::FCTIWUZ;
14984 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14986 EVT TargetVT = N->getValueType(0);
14987 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14988 SDValue NextOp = N->getOperand(i);
14989 if (NextOp.getOpcode() != PPCISD::MFVSR)
14990 return SDValue();
14991 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14992 if (NextConversion != FirstConversion)
14993 return SDValue();
14994 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14995 // This is not valid if the input was originally double precision. It is
14996 // also not profitable to do unless this is an extending load in which
14997 // case doing this combine will allow us to combine consecutive loads.
14998 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14999 return SDValue();
15000 if (N->getOperand(i) != FirstInput)
15001 IsSplat = false;
15002 }
15003
15004 // If this is a splat, we leave it as-is since there will be only a single
15005 // fp-to-int conversion followed by a splat of the integer. This is better
15006 // for 32-bit and smaller ints and neutral for 64-bit ints.
15007 if (IsSplat)
15008 return SDValue();
15009
15010 // Now that we know we have the right type of node, get its operands
15011 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15012 SDValue In = N->getOperand(i).getOperand(0);
15013 if (Is32Bit) {
15014 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15015 // here, we know that all inputs are extending loads so this is safe).
15016 if (In.isUndef())
15017 Ops.push_back(DAG.getUNDEF(SrcVT));
15018 else {
15019 SDValue Trunc =
15020 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15021 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15022 Ops.push_back(Trunc);
15023 }
15024 } else
15025 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15026 }
15027
15028 unsigned Opcode;
15029 if (FirstConversion == PPCISD::FCTIDZ ||
15030 FirstConversion == PPCISD::FCTIWZ)
15031 Opcode = ISD::FP_TO_SINT;
15032 else
15033 Opcode = ISD::FP_TO_UINT;
15034
15035 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15036 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15037 return DAG.getNode(Opcode, dl, TargetVT, BV);
15038 }
15039 return SDValue();
15040}
15041
15042/// Reduce the number of loads when building a vector.
15043///
15044/// Building a vector out of multiple loads can be converted to a load
15045/// of the vector type if the loads are consecutive. If the loads are
15046/// consecutive but in descending order, a shuffle is added at the end
15047/// to reorder the vector.
15049 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15050 "Should be called with a BUILD_VECTOR node");
15051
15052 SDLoc dl(N);
15053
15054 // Return early for non byte-sized type, as they can't be consecutive.
15055 if (!N->getValueType(0).getVectorElementType().isByteSized())
15056 return SDValue();
15057
15058 bool InputsAreConsecutiveLoads = true;
15059 bool InputsAreReverseConsecutive = true;
15060 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15061 SDValue FirstInput = N->getOperand(0);
15062 bool IsRoundOfExtLoad = false;
15063 LoadSDNode *FirstLoad = nullptr;
15064
15065 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15066 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15067 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15068 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15069 }
15070 // Not a build vector of (possibly fp_rounded) loads.
15071 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15072 N->getNumOperands() == 1)
15073 return SDValue();
15074
15075 if (!IsRoundOfExtLoad)
15076 FirstLoad = cast<LoadSDNode>(FirstInput);
15077
15079 InputLoads.push_back(FirstLoad);
15080 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15081 // If any inputs are fp_round(extload), they all must be.
15082 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15083 return SDValue();
15084
15085 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15086 N->getOperand(i);
15087 if (NextInput.getOpcode() != ISD::LOAD)
15088 return SDValue();
15089
15090 SDValue PreviousInput =
15091 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15092 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15093 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15094
15095 // If any inputs are fp_round(extload), they all must be.
15096 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15097 return SDValue();
15098
15099 // We only care about regular loads. The PPC-specific load intrinsics
15100 // will not lead to a merge opportunity.
15101 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15102 InputsAreConsecutiveLoads = false;
15103 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15104 InputsAreReverseConsecutive = false;
15105
15106 // Exit early if the loads are neither consecutive nor reverse consecutive.
15107 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15108 return SDValue();
15109 InputLoads.push_back(LD2);
15110 }
15111
15112 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15113 "The loads cannot be both consecutive and reverse consecutive.");
15114
15115 SDValue WideLoad;
15116 SDValue ReturnSDVal;
15117 if (InputsAreConsecutiveLoads) {
15118 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15119 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15120 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15121 FirstLoad->getAlign());
15122 ReturnSDVal = WideLoad;
15123 } else if (InputsAreReverseConsecutive) {
15124 LoadSDNode *LastLoad = InputLoads.back();
15125 assert(LastLoad && "Input needs to be a LoadSDNode.");
15126 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15127 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15128 LastLoad->getAlign());
15130 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15131 Ops.push_back(i);
15132
15133 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15134 DAG.getUNDEF(N->getValueType(0)), Ops);
15135 } else
15136 return SDValue();
15137
15138 for (auto *LD : InputLoads)
15139 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15140 return ReturnSDVal;
15141}
15142
15143// This function adds the required vector_shuffle needed to get
15144// the elements of the vector extract in the correct position
15145// as specified by the CorrectElems encoding.
15147 SDValue Input, uint64_t Elems,
15148 uint64_t CorrectElems) {
15149 SDLoc dl(N);
15150
15151 unsigned NumElems = Input.getValueType().getVectorNumElements();
15152 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15153
15154 // Knowing the element indices being extracted from the original
15155 // vector and the order in which they're being inserted, just put
15156 // them at element indices required for the instruction.
15157 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15158 if (DAG.getDataLayout().isLittleEndian())
15159 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15160 else
15161 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15162 CorrectElems = CorrectElems >> 8;
15163 Elems = Elems >> 8;
15164 }
15165
15166 SDValue Shuffle =
15167 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15168 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15169
15170 EVT VT = N->getValueType(0);
15171 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15172
15173 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15176 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15177 DAG.getValueType(ExtVT));
15178}
15179
15180// Look for build vector patterns where input operands come from sign
15181// extended vector_extract elements of specific indices. If the correct indices
15182// aren't used, add a vector shuffle to fix up the indices and create
15183// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15184// during instruction selection.
15186 // This array encodes the indices that the vector sign extend instructions
15187 // extract from when extending from one type to another for both BE and LE.
15188 // The right nibble of each byte corresponds to the LE incides.
15189 // and the left nibble of each byte corresponds to the BE incides.
15190 // For example: 0x3074B8FC byte->word
15191 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15192 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15193 // For example: 0x000070F8 byte->double word
15194 // For LE: the allowed indices are: 0x0,0x8
15195 // For BE: the allowed indices are: 0x7,0xF
15196 uint64_t TargetElems[] = {
15197 0x3074B8FC, // b->w
15198 0x000070F8, // b->d
15199 0x10325476, // h->w
15200 0x00003074, // h->d
15201 0x00001032, // w->d
15202 };
15203
15204 uint64_t Elems = 0;
15205 int Index;
15206 SDValue Input;
15207
15208 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15209 if (!Op)
15210 return false;
15211 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15212 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15213 return false;
15214
15215 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15216 // of the right width.
15217 SDValue Extract = Op.getOperand(0);
15218 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15219 Extract = Extract.getOperand(0);
15220 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15221 return false;
15222
15223 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
15224 if (!ExtOp)
15225 return false;
15226
15227 Index = ExtOp->getZExtValue();
15228 if (Input && Input != Extract.getOperand(0))
15229 return false;
15230
15231 if (!Input)
15232 Input = Extract.getOperand(0);
15233
15234 Elems = Elems << 8;
15235 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15236 Elems |= Index;
15237
15238 return true;
15239 };
15240
15241 // If the build vector operands aren't sign extended vector extracts,
15242 // of the same input vector, then return.
15243 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15244 if (!isSExtOfVecExtract(N->getOperand(i))) {
15245 return SDValue();
15246 }
15247 }
15248
15249 // If the vector extract indices are not correct, add the appropriate
15250 // vector_shuffle.
15251 int TgtElemArrayIdx;
15252 int InputSize = Input.getValueType().getScalarSizeInBits();
15253 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15254 if (InputSize + OutputSize == 40)
15255 TgtElemArrayIdx = 0;
15256 else if (InputSize + OutputSize == 72)
15257 TgtElemArrayIdx = 1;
15258 else if (InputSize + OutputSize == 48)
15259 TgtElemArrayIdx = 2;
15260 else if (InputSize + OutputSize == 80)
15261 TgtElemArrayIdx = 3;
15262 else if (InputSize + OutputSize == 96)
15263 TgtElemArrayIdx = 4;
15264 else
15265 return SDValue();
15266
15267 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15268 CorrectElems = DAG.getDataLayout().isLittleEndian()
15269 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15270 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15271 if (Elems != CorrectElems) {
15272 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15273 }
15274
15275 // Regular lowering will catch cases where a shuffle is not needed.
15276 return SDValue();
15277}
15278
15279// Look for the pattern of a load from a narrow width to i128, feeding
15280// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15281// (LXVRZX). This node represents a zero extending load that will be matched
15282// to the Load VSX Vector Rightmost instructions.
15284 SDLoc DL(N);
15285
15286 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15287 if (N->getValueType(0) != MVT::v1i128)
15288 return SDValue();
15289
15290 SDValue Operand = N->getOperand(0);
15291 // Proceed with the transformation if the operand to the BUILD_VECTOR
15292 // is a load instruction.
15293 if (Operand.getOpcode() != ISD::LOAD)
15294 return SDValue();
15295
15296 auto *LD = cast<LoadSDNode>(Operand);
15297 EVT MemoryType = LD->getMemoryVT();
15298
15299 // This transformation is only valid if the we are loading either a byte,
15300 // halfword, word, or doubleword.
15301 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15302 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15303
15304 // Ensure that the load from the narrow width is being zero extended to i128.
15305 if (!ValidLDType ||
15306 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15307 LD->getExtensionType() != ISD::EXTLOAD))
15308 return SDValue();
15309
15310 SDValue LoadOps[] = {
15311 LD->getChain(), LD->getBasePtr(),
15312 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15313
15315 DAG.getVTList(MVT::v1i128, MVT::Other),
15316 LoadOps, MemoryType, LD->getMemOperand());
15317}
15318
15319SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15320 DAGCombinerInfo &DCI) const {
15321 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15322 "Should be called with a BUILD_VECTOR node");
15323
15324 SelectionDAG &DAG = DCI.DAG;
15325 SDLoc dl(N);
15326
15327 if (!Subtarget.hasVSX())
15328 return SDValue();
15329
15330 // The target independent DAG combiner will leave a build_vector of
15331 // float-to-int conversions intact. We can generate MUCH better code for
15332 // a float-to-int conversion of a vector of floats.
15333 SDValue FirstInput = N->getOperand(0);
15334 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15335 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15336 if (Reduced)
15337 return Reduced;
15338 }
15339
15340 // If we're building a vector out of consecutive loads, just load that
15341 // vector type.
15342 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15343 if (Reduced)
15344 return Reduced;
15345
15346 // If we're building a vector out of extended elements from another vector
15347 // we have P9 vector integer extend instructions. The code assumes legal
15348 // input types (i.e. it can't handle things like v4i16) so do not run before
15349 // legalization.
15350 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15351 Reduced = combineBVOfVecSExt(N, DAG);
15352 if (Reduced)
15353 return Reduced;
15354 }
15355
15356 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15357 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15358 // is a load from <valid narrow width> to i128.
15359 if (Subtarget.isISA3_1()) {
15360 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15361 if (BVOfZLoad)
15362 return BVOfZLoad;
15363 }
15364
15365 if (N->getValueType(0) != MVT::v2f64)
15366 return SDValue();
15367
15368 // Looking for:
15369 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15370 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15371 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15372 return SDValue();
15373 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15374 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15375 return SDValue();
15376 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15377 return SDValue();
15378
15379 SDValue Ext1 = FirstInput.getOperand(0);
15380 SDValue Ext2 = N->getOperand(1).getOperand(0);
15381 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15383 return SDValue();
15384
15385 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15386 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15387 if (!Ext1Op || !Ext2Op)
15388 return SDValue();
15389 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15390 Ext1.getOperand(0) != Ext2.getOperand(0))
15391 return SDValue();
15392
15393 int FirstElem = Ext1Op->getZExtValue();
15394 int SecondElem = Ext2Op->getZExtValue();
15395 int SubvecIdx;
15396 if (FirstElem == 0 && SecondElem == 1)
15397 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15398 else if (FirstElem == 2 && SecondElem == 3)
15399 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15400 else
15401 return SDValue();
15402
15403 SDValue SrcVec = Ext1.getOperand(0);
15404 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15406 return DAG.getNode(NodeType, dl, MVT::v2f64,
15407 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15408}
15409
15410SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15411 DAGCombinerInfo &DCI) const {
15412 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15413 N->getOpcode() == ISD::UINT_TO_FP) &&
15414 "Need an int -> FP conversion node here");
15415
15416 if (useSoftFloat() || !Subtarget.has64BitSupport())
15417 return SDValue();
15418
15419 SelectionDAG &DAG = DCI.DAG;
15420 SDLoc dl(N);
15421 SDValue Op(N, 0);
15422
15423 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15424 // from the hardware.
15425 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15426 return SDValue();
15427 if (!Op.getOperand(0).getValueType().isSimple())
15428 return SDValue();
15429 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15430 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15431 return SDValue();
15432
15433 SDValue FirstOperand(Op.getOperand(0));
15434 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15435 (FirstOperand.getValueType() == MVT::i8 ||
15436 FirstOperand.getValueType() == MVT::i16);
15437 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15438 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15439 bool DstDouble = Op.getValueType() == MVT::f64;
15440 unsigned ConvOp = Signed ?
15441 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15442 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15443 SDValue WidthConst =
15444 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15445 dl, false);
15446 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15447 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15449 DAG.getVTList(MVT::f64, MVT::Other),
15450 Ops, MVT::i8, LDN->getMemOperand());
15451 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15452
15453 // For signed conversion, we need to sign-extend the value in the VSR
15454 if (Signed) {
15455 SDValue ExtOps[] = { Ld, WidthConst };
15456 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15457 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15458 } else
15459 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15460 }
15461
15462
15463 // For i32 intermediate values, unfortunately, the conversion functions
15464 // leave the upper 32 bits of the value are undefined. Within the set of
15465 // scalar instructions, we have no method for zero- or sign-extending the
15466 // value. Thus, we cannot handle i32 intermediate values here.
15467 if (Op.getOperand(0).getValueType() == MVT::i32)
15468 return SDValue();
15469
15470 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15471 "UINT_TO_FP is supported only with FPCVT");
15472
15473 // If we have FCFIDS, then use it when converting to single-precision.
15474 // Otherwise, convert to double-precision and then round.
15475 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15476 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15478 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15479 : PPCISD::FCFID);
15480 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15481 ? MVT::f32
15482 : MVT::f64;
15483
15484 // If we're converting from a float, to an int, and back to a float again,
15485 // then we don't need the store/load pair at all.
15486 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15487 Subtarget.hasFPCVT()) ||
15488 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15489 SDValue Src = Op.getOperand(0).getOperand(0);
15490 if (Src.getValueType() == MVT::f32) {
15491 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15492 DCI.AddToWorklist(Src.getNode());
15493 } else if (Src.getValueType() != MVT::f64) {
15494 // Make sure that we don't pick up a ppc_fp128 source value.
15495 return SDValue();
15496 }
15497
15498 unsigned FCTOp =
15499 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15501
15502 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15503 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15504
15505 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15506 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15507 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15508 DCI.AddToWorklist(FP.getNode());
15509 }
15510
15511 return FP;
15512 }
15513
15514 return SDValue();
15515}
15516
15517// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15518// builtins) into loads with swaps.
15520 DAGCombinerInfo &DCI) const {
15521 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15522 // load combines.
15523 if (DCI.isBeforeLegalizeOps())
15524 return SDValue();
15525
15526 SelectionDAG &DAG = DCI.DAG;
15527 SDLoc dl(N);
15528 SDValue Chain;
15529 SDValue Base;
15530 MachineMemOperand *MMO;
15531
15532 switch (N->getOpcode()) {
15533 default:
15534 llvm_unreachable("Unexpected opcode for little endian VSX load");
15535 case ISD::LOAD: {
15536 LoadSDNode *LD = cast<LoadSDNode>(N);
15537 Chain = LD->getChain();
15538 Base = LD->getBasePtr();
15539 MMO = LD->getMemOperand();
15540 // If the MMO suggests this isn't a load of a full vector, leave
15541 // things alone. For a built-in, we have to make the change for
15542 // correctness, so if there is a size problem that will be a bug.
15543 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15544 return SDValue();
15545 break;
15546 }
15548 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15549 Chain = Intrin->getChain();
15550 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15551 // us what we want. Get operand 2 instead.
15552 Base = Intrin->getOperand(2);
15553 MMO = Intrin->getMemOperand();
15554 break;
15555 }
15556 }
15557
15558 MVT VecTy = N->getValueType(0).getSimpleVT();
15559
15560 SDValue LoadOps[] = { Chain, Base };
15562 DAG.getVTList(MVT::v2f64, MVT::Other),
15563 LoadOps, MVT::v2f64, MMO);
15564
15565 DCI.AddToWorklist(Load.getNode());
15566 Chain = Load.getValue(1);
15567 SDValue Swap = DAG.getNode(
15568 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15569 DCI.AddToWorklist(Swap.getNode());
15570
15571 // Add a bitcast if the resulting load type doesn't match v2f64.
15572 if (VecTy != MVT::v2f64) {
15573 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15574 DCI.AddToWorklist(N.getNode());
15575 // Package {bitcast value, swap's chain} to match Load's shape.
15576 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15577 N, Swap.getValue(1));
15578 }
15579
15580 return Swap;
15581}
15582
15583// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15584// builtins) into stores with swaps.
15586 DAGCombinerInfo &DCI) const {
15587 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15588 // store combines.
15589 if (DCI.isBeforeLegalizeOps())
15590 return SDValue();
15591
15592 SelectionDAG &DAG = DCI.DAG;
15593 SDLoc dl(N);
15594 SDValue Chain;
15595 SDValue Base;
15596 unsigned SrcOpnd;
15597 MachineMemOperand *MMO;
15598
15599 switch (N->getOpcode()) {
15600 default:
15601 llvm_unreachable("Unexpected opcode for little endian VSX store");
15602 case ISD::STORE: {
15603 StoreSDNode *ST = cast<StoreSDNode>(N);
15604 Chain = ST->getChain();
15605 Base = ST->getBasePtr();
15606 MMO = ST->getMemOperand();
15607 SrcOpnd = 1;
15608 // If the MMO suggests this isn't a store of a full vector, leave
15609 // things alone. For a built-in, we have to make the change for
15610 // correctness, so if there is a size problem that will be a bug.
15611 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15612 return SDValue();
15613 break;
15614 }
15615 case ISD::INTRINSIC_VOID: {
15616 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15617 Chain = Intrin->getChain();
15618 // Intrin->getBasePtr() oddly does not get what we want.
15619 Base = Intrin->getOperand(3);
15620 MMO = Intrin->getMemOperand();
15621 SrcOpnd = 2;
15622 break;
15623 }
15624 }
15625
15626 SDValue Src = N->getOperand(SrcOpnd);
15627 MVT VecTy = Src.getValueType().getSimpleVT();
15628
15629 // All stores are done as v2f64 and possible bit cast.
15630 if (VecTy != MVT::v2f64) {
15631 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15632 DCI.AddToWorklist(Src.getNode());
15633 }
15634
15635 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15636 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15637 DCI.AddToWorklist(Swap.getNode());
15638 Chain = Swap.getValue(1);
15639 SDValue StoreOps[] = { Chain, Swap, Base };
15641 DAG.getVTList(MVT::Other),
15642 StoreOps, VecTy, MMO);
15643 DCI.AddToWorklist(Store.getNode());
15644 return Store;
15645}
15646
15647// Handle DAG combine for STORE (FP_TO_INT F).
15648SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15649 DAGCombinerInfo &DCI) const {
15650 SelectionDAG &DAG = DCI.DAG;
15651 SDLoc dl(N);
15652 unsigned Opcode = N->getOperand(1).getOpcode();
15653 (void)Opcode;
15654 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15655
15656 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15657 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15658 && "Not a FP_TO_INT Instruction!");
15659
15660 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15661 EVT Op1VT = N->getOperand(1).getValueType();
15662 EVT ResVT = Val.getValueType();
15663
15664 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15665 return SDValue();
15666
15667 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15668 bool ValidTypeForStoreFltAsInt =
15669 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15670 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15671
15672 // TODO: Lower conversion from f128 on all VSX targets
15673 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15674 return SDValue();
15675
15676 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15677 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15678 return SDValue();
15679
15680 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15681
15682 // Set number of bytes being converted.
15683 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15684 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15685 DAG.getIntPtrConstant(ByteSize, dl, false),
15686 DAG.getValueType(Op1VT)};
15687
15689 DAG.getVTList(MVT::Other), Ops,
15690 cast<StoreSDNode>(N)->getMemoryVT(),
15691 cast<StoreSDNode>(N)->getMemOperand());
15692
15693 return Val;
15694}
15695
15696static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15697 // Check that the source of the element keeps flipping
15698 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15699 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15700 for (int i = 1, e = Mask.size(); i < e; i++) {
15701 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15702 return false;
15703 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15704 return false;
15705 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15706 }
15707 return true;
15708}
15709
15710static bool isSplatBV(SDValue Op) {
15711 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15712 return false;
15713 SDValue FirstOp;
15714
15715 // Find first non-undef input.
15716 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15717 FirstOp = Op.getOperand(i);
15718 if (!FirstOp.isUndef())
15719 break;
15720 }
15721
15722 // All inputs are undef or the same as the first non-undef input.
15723 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15724 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15725 return false;
15726 return true;
15727}
15728
15730 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15731 return Op;
15732 if (Op.getOpcode() != ISD::BITCAST)
15733 return SDValue();
15734 Op = Op.getOperand(0);
15735 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15736 return Op;
15737 return SDValue();
15738}
15739
15740// Fix up the shuffle mask to account for the fact that the result of
15741// scalar_to_vector is not in lane zero. This just takes all values in
15742// the ranges specified by the min/max indices and adds the number of
15743// elements required to ensure each element comes from the respective
15744// position in the valid lane.
15745// On little endian, that's just the corresponding element in the other
15746// half of the vector. On big endian, it is in the same half but right
15747// justified rather than left justified in that half.
15749 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15750 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15751 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15752 int LHSEltFixup =
15753 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15754 int RHSEltFixup =
15755 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15756 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15757 int Idx = ShuffV[I];
15758 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15759 ShuffV[I] += LHSEltFixup;
15760 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15761 ShuffV[I] += RHSEltFixup;
15762 }
15763}
15764
15765// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15766// the original is:
15767// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15768// In such a case, just change the shuffle mask to extract the element
15769// from the permuted index.
15771 const PPCSubtarget &Subtarget) {
15772 SDLoc dl(OrigSToV);
15773 EVT VT = OrigSToV.getValueType();
15774 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15775 "Expecting a SCALAR_TO_VECTOR here");
15776 SDValue Input = OrigSToV.getOperand(0);
15777
15778 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15779 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15780 SDValue OrigVector = Input.getOperand(0);
15781
15782 // Can't handle non-const element indices or different vector types
15783 // for the input to the extract and the output of the scalar_to_vector.
15784 if (Idx && VT == OrigVector.getValueType()) {
15785 unsigned NumElts = VT.getVectorNumElements();
15786 assert(
15787 NumElts > 1 &&
15788 "Cannot produce a permuted scalar_to_vector for one element vector");
15789 SmallVector<int, 16> NewMask(NumElts, -1);
15790 unsigned ResultInElt = NumElts / 2;
15791 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15792 NewMask[ResultInElt] = Idx->getZExtValue();
15793 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15794 }
15795 }
15796 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15797 OrigSToV.getOperand(0));
15798}
15799
15801 int HalfVec, int LHSLastElementDefined,
15802 int RHSLastElementDefined) {
15803 for (int Index : ShuffV) {
15804 if (Index < 0) // Skip explicitly undefined mask indices.
15805 continue;
15806 // Handle first input vector of the vector_shuffle.
15807 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15808 (Index > LHSLastElementDefined))
15809 return false;
15810 // Handle second input vector of the vector_shuffle.
15811 if ((RHSLastElementDefined >= 0) &&
15812 (Index > HalfVec + RHSLastElementDefined))
15813 return false;
15814 }
15815 return true;
15816}
15817
15819 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15820 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15821 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15822 EVT VecShuffOperandType = VecShuffOperand.getValueType();
15823 // Set up the values for the shuffle vector fixup.
15824 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15825 // The last element depends on if the input comes from the LHS or RHS.
15826 //
15827 // For example:
15828 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15829 //
15830 // For the LHS: The last element that comes from the LHS is actually 0, not 3
15831 // because elements 1 and higher of a scalar_to_vector are undefined.
15832 // For the RHS: The last element that comes from the RHS is actually 5, not 7
15833 // because elements 1 and higher of a scalar_to_vector are undefined.
15834 // It is also not 4 because the original scalar_to_vector is wider and
15835 // actually contains two i32 elements.
15836 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15837 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15838 : FirstElt;
15839 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15840 if (SToVPermuted.getValueType() != VecShuffOperandType)
15841 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15842 return SToVPermuted;
15843}
15844
15845// On little endian subtargets, combine shuffles such as:
15846// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15847// into:
15848// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15849// because the latter can be matched to a single instruction merge.
15850// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15851// to put the value into element zero. Adjust the shuffle mask so that the
15852// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15853// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15854// nodes with elements smaller than doubleword because all the ways
15855// of getting scalar data into a vector register put the value in the
15856// rightmost element of the left half of the vector.
15857SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15858 SelectionDAG &DAG) const {
15859 SDValue LHS = SVN->getOperand(0);
15860 SDValue RHS = SVN->getOperand(1);
15861 auto Mask = SVN->getMask();
15862 int NumElts = LHS.getValueType().getVectorNumElements();
15863 SDValue Res(SVN, 0);
15864 SDLoc dl(SVN);
15865 bool IsLittleEndian = Subtarget.isLittleEndian();
15866
15867 // On big endian targets this is only useful for subtargets with direct moves.
15868 // On little endian targets it would be useful for all subtargets with VSX.
15869 // However adding special handling for LE subtargets without direct moves
15870 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15871 // which includes direct moves.
15872 if (!Subtarget.hasDirectMove())
15873 return Res;
15874
15875 // If this is not a shuffle of a shuffle and the first element comes from
15876 // the second vector, canonicalize to the commuted form. This will make it
15877 // more likely to match one of the single instruction patterns.
15878 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15879 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15880 std::swap(LHS, RHS);
15881 Res = DAG.getCommutedVectorShuffle(*SVN);
15882 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15883 }
15884
15885 // Adjust the shuffle mask if either input vector comes from a
15886 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15887 // form (to prevent the need for a swap).
15888 SmallVector<int, 16> ShuffV(Mask);
15889 SDValue SToVLHS = isScalarToVec(LHS);
15890 SDValue SToVRHS = isScalarToVec(RHS);
15891 if (SToVLHS || SToVRHS) {
15892 EVT VT = SVN->getValueType(0);
15893 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15894 int ShuffleNumElts = ShuffV.size();
15895 int HalfVec = ShuffleNumElts / 2;
15896 // The width of the "valid lane" (i.e. the lane that contains the value that
15897 // is vectorized) needs to be expressed in terms of the number of elements
15898 // of the shuffle. It is thereby the ratio of the values before and after
15899 // any bitcast, which will be set later on if the LHS or RHS are
15900 // SCALAR_TO_VECTOR nodes.
15901 unsigned LHSNumValidElts = HalfVec;
15902 unsigned RHSNumValidElts = HalfVec;
15903
15904 // Initially assume that neither input is permuted. These will be adjusted
15905 // accordingly if either input is. Note, that -1 means that all elements
15906 // are undefined.
15907 int LHSFirstElt = 0;
15908 int RHSFirstElt = ShuffleNumElts;
15909 int LHSLastElt = -1;
15910 int RHSLastElt = -1;
15911
15912 // Get the permuted scalar to vector nodes for the source(s) that come from
15913 // ISD::SCALAR_TO_VECTOR.
15914 // On big endian systems, this only makes sense for element sizes smaller
15915 // than 64 bits since for 64-bit elements, all instructions already put
15916 // the value into element zero. Since scalar size of LHS and RHS may differ
15917 // after isScalarToVec, this should be checked using their own sizes.
15918 int LHSScalarSize = 0;
15919 int RHSScalarSize = 0;
15920 if (SToVLHS) {
15921 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15922 if (!IsLittleEndian && LHSScalarSize >= 64)
15923 return Res;
15924 }
15925 if (SToVRHS) {
15926 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15927 if (!IsLittleEndian && RHSScalarSize >= 64)
15928 return Res;
15929 }
15930 if (LHSScalarSize != 0)
15932 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15933 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15934 if (RHSScalarSize != 0)
15936 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15937 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15938
15939 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15940 return Res;
15941
15942 // Fix up the shuffle mask to reflect where the desired element actually is.
15943 // The minimum and maximum indices that correspond to element zero for both
15944 // the LHS and RHS are computed and will control which shuffle mask entries
15945 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15946 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15948 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15949 LHSNumValidElts, RHSNumValidElts, Subtarget);
15950 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15951
15952 // We may have simplified away the shuffle. We won't be able to do anything
15953 // further with it here.
15954 if (!isa<ShuffleVectorSDNode>(Res))
15955 return Res;
15956 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15957 }
15958
15959 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15960 // The common case after we commuted the shuffle is that the RHS is a splat
15961 // and we have elements coming in from the splat at indices that are not
15962 // conducive to using a merge.
15963 // Example:
15964 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15965 if (!isSplatBV(TheSplat))
15966 return Res;
15967
15968 // We are looking for a mask such that all even elements are from
15969 // one vector and all odd elements from the other.
15970 if (!isAlternatingShuffMask(Mask, NumElts))
15971 return Res;
15972
15973 // Adjust the mask so we are pulling in the same index from the splat
15974 // as the index from the interesting vector in consecutive elements.
15975 if (IsLittleEndian) {
15976 // Example (even elements from first vector):
15977 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15978 if (Mask[0] < NumElts)
15979 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15980 if (ShuffV[i] < 0)
15981 continue;
15982 // If element from non-splat is undef, pick first element from splat.
15983 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15984 }
15985 // Example (odd elements from first vector):
15986 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15987 else
15988 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15989 if (ShuffV[i] < 0)
15990 continue;
15991 // If element from non-splat is undef, pick first element from splat.
15992 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15993 }
15994 } else {
15995 // Example (even elements from first vector):
15996 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15997 if (Mask[0] < NumElts)
15998 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15999 if (ShuffV[i] < 0)
16000 continue;
16001 // If element from non-splat is undef, pick first element from splat.
16002 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16003 }
16004 // Example (odd elements from first vector):
16005 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16006 else
16007 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16008 if (ShuffV[i] < 0)
16009 continue;
16010 // If element from non-splat is undef, pick first element from splat.
16011 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16012 }
16013 }
16014
16015 // If the RHS has undefs, we need to remove them since we may have created
16016 // a shuffle that adds those instead of the splat value.
16017 SDValue SplatVal =
16018 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16019 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16020
16021 if (IsLittleEndian)
16022 RHS = TheSplat;
16023 else
16024 LHS = TheSplat;
16025 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16026}
16027
16028SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16029 LSBaseSDNode *LSBase,
16030 DAGCombinerInfo &DCI) const {
16031 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16032 "Not a reverse memop pattern!");
16033
16034 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16035 auto Mask = SVN->getMask();
16036 int i = 0;
16037 auto I = Mask.rbegin();
16038 auto E = Mask.rend();
16039
16040 for (; I != E; ++I) {
16041 if (*I != i)
16042 return false;
16043 i++;
16044 }
16045 return true;
16046 };
16047
16048 SelectionDAG &DAG = DCI.DAG;
16049 EVT VT = SVN->getValueType(0);
16050
16051 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16052 return SDValue();
16053
16054 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16055 // See comment in PPCVSXSwapRemoval.cpp.
16056 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16057 if (!Subtarget.hasP9Vector())
16058 return SDValue();
16059
16060 if(!IsElementReverse(SVN))
16061 return SDValue();
16062
16063 if (LSBase->getOpcode() == ISD::LOAD) {
16064 // If the load return value 0 has more than one user except the
16065 // shufflevector instruction, it is not profitable to replace the
16066 // shufflevector with a reverse load.
16067 for (SDUse &Use : LSBase->uses())
16068 if (Use.getResNo() == 0 &&
16069 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16070 return SDValue();
16071
16072 SDLoc dl(LSBase);
16073 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16074 return DAG.getMemIntrinsicNode(
16075 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16076 LSBase->getMemoryVT(), LSBase->getMemOperand());
16077 }
16078
16079 if (LSBase->getOpcode() == ISD::STORE) {
16080 // If there are other uses of the shuffle, the swap cannot be avoided.
16081 // Forcing the use of an X-Form (since swapped stores only have
16082 // X-Forms) without removing the swap is unprofitable.
16083 if (!SVN->hasOneUse())
16084 return SDValue();
16085
16086 SDLoc dl(LSBase);
16087 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16088 LSBase->getBasePtr()};
16089 return DAG.getMemIntrinsicNode(
16090 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16091 LSBase->getMemoryVT(), LSBase->getMemOperand());
16092 }
16093
16094 llvm_unreachable("Expected a load or store node here");
16095}
16096
16097static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16098 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16099 if (IntrinsicID == Intrinsic::ppc_stdcx)
16100 StoreWidth = 8;
16101 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16102 StoreWidth = 4;
16103 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16104 StoreWidth = 2;
16105 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16106 StoreWidth = 1;
16107 else
16108 return false;
16109 return true;
16110}
16111
16113 DAGCombinerInfo &DCI) const {
16114 SelectionDAG &DAG = DCI.DAG;
16115 SDLoc dl(N);
16116 switch (N->getOpcode()) {
16117 default: break;
16118 case ISD::ADD:
16119 return combineADD(N, DCI);
16120 case ISD::AND: {
16121 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16122 // original input as that will prevent us from selecting optimal rotates.
16123 // This only matters if the input to the extend is i32 widened to i64.
16124 SDValue Op1 = N->getOperand(0);
16125 SDValue Op2 = N->getOperand(1);
16126 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16127 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16128 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16129 Op1.getOperand(0).getValueType() != MVT::i32)
16130 break;
16131 SDValue NarrowOp = Op1.getOperand(0);
16132 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16133 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16134 break;
16135
16136 uint64_t Imm = Op2->getAsZExtVal();
16137 // Make sure that the constant is narrow enough to fit in the narrow type.
16138 if (!isUInt<32>(Imm))
16139 break;
16140 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16141 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16142 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16143 }
16144 case ISD::SHL:
16145 return combineSHL(N, DCI);
16146 case ISD::SRA:
16147 return combineSRA(N, DCI);
16148 case ISD::SRL:
16149 return combineSRL(N, DCI);
16150 case ISD::MUL:
16151 return combineMUL(N, DCI);
16152 case ISD::FMA:
16153 case PPCISD::FNMSUB:
16154 return combineFMALike(N, DCI);
16155 case PPCISD::SHL:
16156 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16157 return N->getOperand(0);
16158 break;
16159 case PPCISD::SRL:
16160 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16161 return N->getOperand(0);
16162 break;
16163 case PPCISD::SRA:
16164 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16165 if (C->isZero() || // 0 >>s V -> 0.
16166 C->isAllOnes()) // -1 >>s V -> -1.
16167 return N->getOperand(0);
16168 }
16169 break;
16170 case ISD::SIGN_EXTEND:
16171 case ISD::ZERO_EXTEND:
16172 case ISD::ANY_EXTEND:
16173 return DAGCombineExtBoolTrunc(N, DCI);
16174 case ISD::TRUNCATE:
16175 return combineTRUNCATE(N, DCI);
16176 case ISD::SETCC:
16177 if (SDValue CSCC = combineSetCC(N, DCI))
16178 return CSCC;
16179 [[fallthrough]];
16180 case ISD::SELECT_CC:
16181 return DAGCombineTruncBoolExt(N, DCI);
16182 case ISD::SINT_TO_FP:
16183 case ISD::UINT_TO_FP:
16184 return combineFPToIntToFP(N, DCI);
16186 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16187 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16188 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16189 }
16190 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16191 case ISD::STORE: {
16192
16193 EVT Op1VT = N->getOperand(1).getValueType();
16194 unsigned Opcode = N->getOperand(1).getOpcode();
16195
16196 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16197 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16198 SDValue Val = combineStoreFPToInt(N, DCI);
16199 if (Val)
16200 return Val;
16201 }
16202
16203 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16204 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16205 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16206 if (Val)
16207 return Val;
16208 }
16209
16210 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16211 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16212 N->getOperand(1).getNode()->hasOneUse() &&
16213 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16214 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16215
16216 // STBRX can only handle simple types and it makes no sense to store less
16217 // two bytes in byte-reversed order.
16218 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16219 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16220 break;
16221
16222 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16223 // Do an any-extend to 32-bits if this is a half-word input.
16224 if (BSwapOp.getValueType() == MVT::i16)
16225 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16226
16227 // If the type of BSWAP operand is wider than stored memory width
16228 // it need to be shifted to the right side before STBRX.
16229 if (Op1VT.bitsGT(mVT)) {
16230 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16231 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16232 DAG.getConstant(Shift, dl, MVT::i32));
16233 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16234 if (Op1VT == MVT::i64)
16235 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16236 }
16237
16238 SDValue Ops[] = {
16239 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16240 };
16241 return
16242 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16243 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16244 cast<StoreSDNode>(N)->getMemOperand());
16245 }
16246
16247 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16248 // So it can increase the chance of CSE constant construction.
16249 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16250 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16251 // Need to sign-extended to 64-bits to handle negative values.
16252 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16253 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16254 MemVT.getSizeInBits());
16255 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16256
16257 // DAG.getTruncStore() can't be used here because it doesn't accept
16258 // the general (base + offset) addressing mode.
16259 // So we use UpdateNodeOperands and setTruncatingStore instead.
16260 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
16261 N->getOperand(3));
16262 cast<StoreSDNode>(N)->setTruncatingStore(true);
16263 return SDValue(N, 0);
16264 }
16265
16266 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16267 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16268 if (Op1VT.isSimple()) {
16269 MVT StoreVT = Op1VT.getSimpleVT();
16270 if (Subtarget.needsSwapsForVSXMemOps() &&
16271 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16272 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16273 return expandVSXStoreForLE(N, DCI);
16274 }
16275 break;
16276 }
16277 case ISD::LOAD: {
16278 LoadSDNode *LD = cast<LoadSDNode>(N);
16279 EVT VT = LD->getValueType(0);
16280
16281 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16282 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16283 if (VT.isSimple()) {
16284 MVT LoadVT = VT.getSimpleVT();
16285 if (Subtarget.needsSwapsForVSXMemOps() &&
16286 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16287 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16288 return expandVSXLoadForLE(N, DCI);
16289 }
16290
16291 // We sometimes end up with a 64-bit integer load, from which we extract
16292 // two single-precision floating-point numbers. This happens with
16293 // std::complex<float>, and other similar structures, because of the way we
16294 // canonicalize structure copies. However, if we lack direct moves,
16295 // then the final bitcasts from the extracted integer values to the
16296 // floating-point numbers turn into store/load pairs. Even with direct moves,
16297 // just loading the two floating-point numbers is likely better.
16298 auto ReplaceTwoFloatLoad = [&]() {
16299 if (VT != MVT::i64)
16300 return false;
16301
16302 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16303 LD->isVolatile())
16304 return false;
16305
16306 // We're looking for a sequence like this:
16307 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16308 // t16: i64 = srl t13, Constant:i32<32>
16309 // t17: i32 = truncate t16
16310 // t18: f32 = bitcast t17
16311 // t19: i32 = truncate t13
16312 // t20: f32 = bitcast t19
16313
16314 if (!LD->hasNUsesOfValue(2, 0))
16315 return false;
16316
16317 auto UI = LD->user_begin();
16318 while (UI.getUse().getResNo() != 0) ++UI;
16319 SDNode *Trunc = *UI++;
16320 while (UI.getUse().getResNo() != 0) ++UI;
16321 SDNode *RightShift = *UI;
16322 if (Trunc->getOpcode() != ISD::TRUNCATE)
16323 std::swap(Trunc, RightShift);
16324
16325 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16326 Trunc->getValueType(0) != MVT::i32 ||
16327 !Trunc->hasOneUse())
16328 return false;
16329 if (RightShift->getOpcode() != ISD::SRL ||
16330 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16331 RightShift->getConstantOperandVal(1) != 32 ||
16332 !RightShift->hasOneUse())
16333 return false;
16334
16335 SDNode *Trunc2 = *RightShift->user_begin();
16336 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16337 Trunc2->getValueType(0) != MVT::i32 ||
16338 !Trunc2->hasOneUse())
16339 return false;
16340
16341 SDNode *Bitcast = *Trunc->user_begin();
16342 SDNode *Bitcast2 = *Trunc2->user_begin();
16343
16344 if (Bitcast->getOpcode() != ISD::BITCAST ||
16345 Bitcast->getValueType(0) != MVT::f32)
16346 return false;
16347 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16348 Bitcast2->getValueType(0) != MVT::f32)
16349 return false;
16350
16351 if (Subtarget.isLittleEndian())
16352 std::swap(Bitcast, Bitcast2);
16353
16354 // Bitcast has the second float (in memory-layout order) and Bitcast2
16355 // has the first one.
16356
16357 SDValue BasePtr = LD->getBasePtr();
16358 if (LD->isIndexed()) {
16359 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16360 "Non-pre-inc AM on PPC?");
16361 BasePtr =
16362 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16363 LD->getOffset());
16364 }
16365
16366 auto MMOFlags =
16367 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16368 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16369 LD->getPointerInfo(), LD->getAlign(),
16370 MMOFlags, LD->getAAInfo());
16371 SDValue AddPtr =
16372 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16373 BasePtr, DAG.getIntPtrConstant(4, dl));
16374 SDValue FloatLoad2 = DAG.getLoad(
16375 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16376 LD->getPointerInfo().getWithOffset(4),
16377 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16378
16379 if (LD->isIndexed()) {
16380 // Note that DAGCombine should re-form any pre-increment load(s) from
16381 // what is produced here if that makes sense.
16382 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16383 }
16384
16385 DCI.CombineTo(Bitcast2, FloatLoad);
16386 DCI.CombineTo(Bitcast, FloatLoad2);
16387
16388 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16389 SDValue(FloatLoad2.getNode(), 1));
16390 return true;
16391 };
16392
16393 if (ReplaceTwoFloatLoad())
16394 return SDValue(N, 0);
16395
16396 EVT MemVT = LD->getMemoryVT();
16397 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16398 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16399 if (LD->isUnindexed() && VT.isVector() &&
16400 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16401 // P8 and later hardware should just use LOAD.
16402 !Subtarget.hasP8Vector() &&
16403 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16404 VT == MVT::v4f32))) &&
16405 LD->getAlign() < ABIAlignment) {
16406 // This is a type-legal unaligned Altivec load.
16407 SDValue Chain = LD->getChain();
16408 SDValue Ptr = LD->getBasePtr();
16409 bool isLittleEndian = Subtarget.isLittleEndian();
16410
16411 // This implements the loading of unaligned vectors as described in
16412 // the venerable Apple Velocity Engine overview. Specifically:
16413 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16414 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16415 //
16416 // The general idea is to expand a sequence of one or more unaligned
16417 // loads into an alignment-based permutation-control instruction (lvsl
16418 // or lvsr), a series of regular vector loads (which always truncate
16419 // their input address to an aligned address), and a series of
16420 // permutations. The results of these permutations are the requested
16421 // loaded values. The trick is that the last "extra" load is not taken
16422 // from the address you might suspect (sizeof(vector) bytes after the
16423 // last requested load), but rather sizeof(vector) - 1 bytes after the
16424 // last requested vector. The point of this is to avoid a page fault if
16425 // the base address happened to be aligned. This works because if the
16426 // base address is aligned, then adding less than a full vector length
16427 // will cause the last vector in the sequence to be (re)loaded.
16428 // Otherwise, the next vector will be fetched as you might suspect was
16429 // necessary.
16430
16431 // We might be able to reuse the permutation generation from
16432 // a different base address offset from this one by an aligned amount.
16433 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16434 // optimization later.
16435 Intrinsic::ID Intr, IntrLD, IntrPerm;
16436 MVT PermCntlTy, PermTy, LDTy;
16437 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16438 : Intrinsic::ppc_altivec_lvsl;
16439 IntrLD = Intrinsic::ppc_altivec_lvx;
16440 IntrPerm = Intrinsic::ppc_altivec_vperm;
16441 PermCntlTy = MVT::v16i8;
16442 PermTy = MVT::v4i32;
16443 LDTy = MVT::v4i32;
16444
16445 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16446
16447 // Create the new MMO for the new base load. It is like the original MMO,
16448 // but represents an area in memory almost twice the vector size centered
16449 // on the original address. If the address is unaligned, we might start
16450 // reading up to (sizeof(vector)-1) bytes below the address of the
16451 // original unaligned load.
16453 MachineMemOperand *BaseMMO =
16454 MF.getMachineMemOperand(LD->getMemOperand(),
16455 -(int64_t)MemVT.getStoreSize()+1,
16456 2*MemVT.getStoreSize()-1);
16457
16458 // Create the new base load.
16459 SDValue LDXIntID =
16460 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16461 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16462 SDValue BaseLoad =
16464 DAG.getVTList(PermTy, MVT::Other),
16465 BaseLoadOps, LDTy, BaseMMO);
16466
16467 // Note that the value of IncOffset (which is provided to the next
16468 // load's pointer info offset value, and thus used to calculate the
16469 // alignment), and the value of IncValue (which is actually used to
16470 // increment the pointer value) are different! This is because we
16471 // require the next load to appear to be aligned, even though it
16472 // is actually offset from the base pointer by a lesser amount.
16473 int IncOffset = VT.getSizeInBits() / 8;
16474 int IncValue = IncOffset;
16475
16476 // Walk (both up and down) the chain looking for another load at the real
16477 // (aligned) offset (the alignment of the other load does not matter in
16478 // this case). If found, then do not use the offset reduction trick, as
16479 // that will prevent the loads from being later combined (as they would
16480 // otherwise be duplicates).
16481 if (!findConsecutiveLoad(LD, DAG))
16482 --IncValue;
16483
16484 SDValue Increment =
16485 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16486 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16487
16488 MachineMemOperand *ExtraMMO =
16489 MF.getMachineMemOperand(LD->getMemOperand(),
16490 1, 2*MemVT.getStoreSize()-1);
16491 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16492 SDValue ExtraLoad =
16494 DAG.getVTList(PermTy, MVT::Other),
16495 ExtraLoadOps, LDTy, ExtraMMO);
16496
16497 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16498 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16499
16500 // Because vperm has a big-endian bias, we must reverse the order
16501 // of the input vectors and complement the permute control vector
16502 // when generating little endian code. We have already handled the
16503 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16504 // and ExtraLoad here.
16505 SDValue Perm;
16506 if (isLittleEndian)
16507 Perm = BuildIntrinsicOp(IntrPerm,
16508 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16509 else
16510 Perm = BuildIntrinsicOp(IntrPerm,
16511 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16512
16513 if (VT != PermTy)
16514 Perm = Subtarget.hasAltivec()
16515 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16516 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16517 DAG.getTargetConstant(1, dl, MVT::i64));
16518 // second argument is 1 because this rounding
16519 // is always exact.
16520
16521 // The output of the permutation is our loaded result, the TokenFactor is
16522 // our new chain.
16523 DCI.CombineTo(N, Perm, TF);
16524 return SDValue(N, 0);
16525 }
16526 }
16527 break;
16529 bool isLittleEndian = Subtarget.isLittleEndian();
16530 unsigned IID = N->getConstantOperandVal(0);
16531 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16532 : Intrinsic::ppc_altivec_lvsl);
16533 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16534 SDValue Add = N->getOperand(1);
16535
16536 int Bits = 4 /* 16 byte alignment */;
16537
16538 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16539 APInt::getAllOnes(Bits /* alignment */)
16540 .zext(Add.getScalarValueSizeInBits()))) {
16541 SDNode *BasePtr = Add->getOperand(0).getNode();
16542 for (SDNode *U : BasePtr->users()) {
16543 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16544 U->getConstantOperandVal(0) == IID) {
16545 // We've found another LVSL/LVSR, and this address is an aligned
16546 // multiple of that one. The results will be the same, so use the
16547 // one we've just found instead.
16548
16549 return SDValue(U, 0);
16550 }
16551 }
16552 }
16553
16554 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16555 SDNode *BasePtr = Add->getOperand(0).getNode();
16556 for (SDNode *U : BasePtr->users()) {
16557 if (U->getOpcode() == ISD::ADD &&
16558 isa<ConstantSDNode>(U->getOperand(1)) &&
16559 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16560 (1ULL << Bits) ==
16561 0) {
16562 SDNode *OtherAdd = U;
16563 for (SDNode *V : OtherAdd->users()) {
16564 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16565 V->getConstantOperandVal(0) == IID) {
16566 return SDValue(V, 0);
16567 }
16568 }
16569 }
16570 }
16571 }
16572 }
16573
16574 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16575 // Expose the vabsduw/h/b opportunity for down stream
16576 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16577 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16578 IID == Intrinsic::ppc_altivec_vmaxsh ||
16579 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16580 SDValue V1 = N->getOperand(1);
16581 SDValue V2 = N->getOperand(2);
16582 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16583 V1.getSimpleValueType() == MVT::v8i16 ||
16584 V1.getSimpleValueType() == MVT::v16i8) &&
16585 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16586 // (0-a, a)
16587 if (V1.getOpcode() == ISD::SUB &&
16589 V1.getOperand(1) == V2) {
16590 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16591 }
16592 // (a, 0-a)
16593 if (V2.getOpcode() == ISD::SUB &&
16594 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16595 V2.getOperand(1) == V1) {
16596 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16597 }
16598 // (x-y, y-x)
16599 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16600 V1.getOperand(0) == V2.getOperand(1) &&
16601 V1.getOperand(1) == V2.getOperand(0)) {
16602 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16603 }
16604 }
16605 }
16606 }
16607
16608 break;
16610 switch (N->getConstantOperandVal(1)) {
16611 default:
16612 break;
16613 case Intrinsic::ppc_altivec_vsum4sbs:
16614 case Intrinsic::ppc_altivec_vsum4shs:
16615 case Intrinsic::ppc_altivec_vsum4ubs: {
16616 // These sum-across intrinsics only have a chain due to the side effect
16617 // that they may set the SAT bit. If we know the SAT bit will not be set
16618 // for some inputs, we can replace any uses of their chain with the
16619 // input chain.
16620 if (BuildVectorSDNode *BVN =
16621 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16622 APInt APSplatBits, APSplatUndef;
16623 unsigned SplatBitSize;
16624 bool HasAnyUndefs;
16625 bool BVNIsConstantSplat = BVN->isConstantSplat(
16626 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16627 !Subtarget.isLittleEndian());
16628 // If the constant splat vector is 0, the SAT bit will not be set.
16629 if (BVNIsConstantSplat && APSplatBits == 0)
16630 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16631 }
16632 return SDValue();
16633 }
16634 case Intrinsic::ppc_vsx_lxvw4x:
16635 case Intrinsic::ppc_vsx_lxvd2x:
16636 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16637 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16638 if (Subtarget.needsSwapsForVSXMemOps())
16639 return expandVSXLoadForLE(N, DCI);
16640 break;
16641 }
16642 break;
16644 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16645 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16646 if (Subtarget.needsSwapsForVSXMemOps()) {
16647 switch (N->getConstantOperandVal(1)) {
16648 default:
16649 break;
16650 case Intrinsic::ppc_vsx_stxvw4x:
16651 case Intrinsic::ppc_vsx_stxvd2x:
16652 return expandVSXStoreForLE(N, DCI);
16653 }
16654 }
16655 break;
16656 case ISD::BSWAP: {
16657 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16658 // For subtargets without LDBRX, we can still do better than the default
16659 // expansion even for 64-bit BSWAP (LOAD).
16660 bool Is64BitBswapOn64BitTgt =
16661 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16662 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16663 N->getOperand(0).hasOneUse();
16664 if (IsSingleUseNormalLd &&
16665 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16666 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16667 SDValue Load = N->getOperand(0);
16668 LoadSDNode *LD = cast<LoadSDNode>(Load);
16669 // Create the byte-swapping load.
16670 SDValue Ops[] = {
16671 LD->getChain(), // Chain
16672 LD->getBasePtr(), // Ptr
16673 DAG.getValueType(N->getValueType(0)) // VT
16674 };
16675 SDValue BSLoad =
16677 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16678 MVT::i64 : MVT::i32, MVT::Other),
16679 Ops, LD->getMemoryVT(), LD->getMemOperand());
16680
16681 // If this is an i16 load, insert the truncate.
16682 SDValue ResVal = BSLoad;
16683 if (N->getValueType(0) == MVT::i16)
16684 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16685
16686 // First, combine the bswap away. This makes the value produced by the
16687 // load dead.
16688 DCI.CombineTo(N, ResVal);
16689
16690 // Next, combine the load away, we give it a bogus result value but a real
16691 // chain result. The result value is dead because the bswap is dead.
16692 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16693
16694 // Return N so it doesn't get rechecked!
16695 return SDValue(N, 0);
16696 }
16697 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16698 // before legalization so that the BUILD_PAIR is handled correctly.
16699 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16700 !IsSingleUseNormalLd)
16701 return SDValue();
16702 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16703
16704 // Can't split volatile or atomic loads.
16705 if (!LD->isSimple())
16706 return SDValue();
16707 SDValue BasePtr = LD->getBasePtr();
16708 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16709 LD->getPointerInfo(), LD->getAlign());
16710 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16711 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16712 DAG.getIntPtrConstant(4, dl));
16714 LD->getMemOperand(), 4, 4);
16715 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16716 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16717 SDValue Res;
16718 if (Subtarget.isLittleEndian())
16719 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16720 else
16721 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16722 SDValue TF =
16723 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16724 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16725 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16726 return Res;
16727 }
16728 case PPCISD::VCMP:
16729 // If a VCMP_rec node already exists with exactly the same operands as this
16730 // node, use its result instead of this node (VCMP_rec computes both a CR6
16731 // and a normal output).
16732 //
16733 if (!N->getOperand(0).hasOneUse() &&
16734 !N->getOperand(1).hasOneUse() &&
16735 !N->getOperand(2).hasOneUse()) {
16736
16737 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16738 SDNode *VCMPrecNode = nullptr;
16739
16740 SDNode *LHSN = N->getOperand(0).getNode();
16741 for (SDNode *User : LHSN->users())
16742 if (User->getOpcode() == PPCISD::VCMP_rec &&
16743 User->getOperand(1) == N->getOperand(1) &&
16744 User->getOperand(2) == N->getOperand(2) &&
16745 User->getOperand(0) == N->getOperand(0)) {
16746 VCMPrecNode = User;
16747 break;
16748 }
16749
16750 // If there is no VCMP_rec node, or if the flag value has a single use,
16751 // don't transform this.
16752 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16753 break;
16754
16755 // Look at the (necessarily single) use of the flag value. If it has a
16756 // chain, this transformation is more complex. Note that multiple things
16757 // could use the value result, which we should ignore.
16758 SDNode *FlagUser = nullptr;
16759 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16760 FlagUser == nullptr; ++UI) {
16761 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16762 SDNode *User = UI->getUser();
16763 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16764 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16765 FlagUser = User;
16766 break;
16767 }
16768 }
16769 }
16770
16771 // If the user is a MFOCRF instruction, we know this is safe.
16772 // Otherwise we give up for right now.
16773 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16774 return SDValue(VCMPrecNode, 0);
16775 }
16776 break;
16777 case ISD::BR_CC: {
16778 // If this is a branch on an altivec predicate comparison, lower this so
16779 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16780 // lowering is done pre-legalize, because the legalizer lowers the predicate
16781 // compare down to code that is difficult to reassemble.
16782 // This code also handles branches that depend on the result of a store
16783 // conditional.
16784 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16785 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16786
16787 int CompareOpc;
16788 bool isDot;
16789
16790 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16791 break;
16792
16793 // Since we are doing this pre-legalize, the RHS can be a constant of
16794 // arbitrary bitwidth which may cause issues when trying to get the value
16795 // from the underlying APInt.
16796 auto RHSAPInt = RHS->getAsAPIntVal();
16797 if (!RHSAPInt.isIntN(64))
16798 break;
16799
16800 unsigned Val = RHSAPInt.getZExtValue();
16801 auto isImpossibleCompare = [&]() {
16802 // If this is a comparison against something other than 0/1, then we know
16803 // that the condition is never/always true.
16804 if (Val != 0 && Val != 1) {
16805 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16806 return N->getOperand(0);
16807 // Always !=, turn it into an unconditional branch.
16808 return DAG.getNode(ISD::BR, dl, MVT::Other,
16809 N->getOperand(0), N->getOperand(4));
16810 }
16811 return SDValue();
16812 };
16813 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16814 unsigned StoreWidth = 0;
16815 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16816 isStoreConditional(LHS, StoreWidth)) {
16817 if (SDValue Impossible = isImpossibleCompare())
16818 return Impossible;
16819 PPC::Predicate CompOpc;
16820 // eq 0 => ne
16821 // ne 0 => eq
16822 // eq 1 => eq
16823 // ne 1 => ne
16824 if (Val == 0)
16825 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16826 else
16827 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16828
16829 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16830 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16831 auto *MemNode = cast<MemSDNode>(LHS);
16832 SDValue ConstSt = DAG.getMemIntrinsicNode(
16834 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16835 MemNode->getMemoryVT(), MemNode->getMemOperand());
16836
16837 SDValue InChain;
16838 // Unchain the branch from the original store conditional.
16839 if (N->getOperand(0) == LHS.getValue(1))
16840 InChain = LHS.getOperand(0);
16841 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16842 SmallVector<SDValue, 4> InChains;
16843 SDValue InTF = N->getOperand(0);
16844 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16845 if (InTF.getOperand(i) != LHS.getValue(1))
16846 InChains.push_back(InTF.getOperand(i));
16847 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16848 }
16849
16850 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16851 DAG.getConstant(CompOpc, dl, MVT::i32),
16852 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16853 ConstSt.getValue(2));
16854 }
16855
16856 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16857 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16858 assert(isDot && "Can't compare against a vector result!");
16859
16860 if (SDValue Impossible = isImpossibleCompare())
16861 return Impossible;
16862
16863 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16864 // Create the PPCISD altivec 'dot' comparison node.
16865 SDValue Ops[] = {
16866 LHS.getOperand(2), // LHS of compare
16867 LHS.getOperand(3), // RHS of compare
16868 DAG.getConstant(CompareOpc, dl, MVT::i32)
16869 };
16870 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16871 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16872
16873 // Unpack the result based on how the target uses it.
16874 PPC::Predicate CompOpc;
16875 switch (LHS.getConstantOperandVal(1)) {
16876 default: // Can't happen, don't crash on invalid number though.
16877 case 0: // Branch on the value of the EQ bit of CR6.
16878 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16879 break;
16880 case 1: // Branch on the inverted value of the EQ bit of CR6.
16881 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16882 break;
16883 case 2: // Branch on the value of the LT bit of CR6.
16884 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16885 break;
16886 case 3: // Branch on the inverted value of the LT bit of CR6.
16887 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16888 break;
16889 }
16890
16891 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16892 DAG.getConstant(CompOpc, dl, MVT::i32),
16893 DAG.getRegister(PPC::CR6, MVT::i32),
16894 N->getOperand(4), CompNode.getValue(1));
16895 }
16896 break;
16897 }
16898 case ISD::BUILD_VECTOR:
16899 return DAGCombineBuildVector(N, DCI);
16900 }
16901
16902 return SDValue();
16903}
16904
16905SDValue
16907 SelectionDAG &DAG,
16908 SmallVectorImpl<SDNode *> &Created) const {
16909 // fold (sdiv X, pow2)
16910 EVT VT = N->getValueType(0);
16911 if (VT == MVT::i64 && !Subtarget.isPPC64())
16912 return SDValue();
16913 if ((VT != MVT::i32 && VT != MVT::i64) ||
16914 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16915 return SDValue();
16916
16917 SDLoc DL(N);
16918 SDValue N0 = N->getOperand(0);
16919
16920 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16921 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16922 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16923
16924 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16925 Created.push_back(Op.getNode());
16926
16927 if (IsNegPow2) {
16928 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16929 Created.push_back(Op.getNode());
16930 }
16931
16932 return Op;
16933}
16934
16935//===----------------------------------------------------------------------===//
16936// Inline Assembly Support
16937//===----------------------------------------------------------------------===//
16938
16940 KnownBits &Known,
16941 const APInt &DemandedElts,
16942 const SelectionDAG &DAG,
16943 unsigned Depth) const {
16944 Known.resetAll();
16945 switch (Op.getOpcode()) {
16946 default: break;
16947 case PPCISD::LBRX: {
16948 // lhbrx is known to have the top bits cleared out.
16949 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16950 Known.Zero = 0xFFFF0000;
16951 break;
16952 }
16954 switch (Op.getConstantOperandVal(0)) {
16955 default: break;
16956 case Intrinsic::ppc_altivec_vcmpbfp_p:
16957 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16958 case Intrinsic::ppc_altivec_vcmpequb_p:
16959 case Intrinsic::ppc_altivec_vcmpequh_p:
16960 case Intrinsic::ppc_altivec_vcmpequw_p:
16961 case Intrinsic::ppc_altivec_vcmpequd_p:
16962 case Intrinsic::ppc_altivec_vcmpequq_p:
16963 case Intrinsic::ppc_altivec_vcmpgefp_p:
16964 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16965 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16966 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16967 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16968 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16969 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16970 case Intrinsic::ppc_altivec_vcmpgtub_p:
16971 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16972 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16973 case Intrinsic::ppc_altivec_vcmpgtud_p:
16974 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16975 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16976 break;
16977 }
16978 break;
16979 }
16981 switch (Op.getConstantOperandVal(1)) {
16982 default:
16983 break;
16984 case Intrinsic::ppc_load2r:
16985 // Top bits are cleared for load2r (which is the same as lhbrx).
16986 Known.Zero = 0xFFFF0000;
16987 break;
16988 }
16989 break;
16990 }
16991 }
16992}
16993
16995 switch (Subtarget.getCPUDirective()) {
16996 default: break;
16997 case PPC::DIR_970:
16998 case PPC::DIR_PWR4:
16999 case PPC::DIR_PWR5:
17000 case PPC::DIR_PWR5X:
17001 case PPC::DIR_PWR6:
17002 case PPC::DIR_PWR6X:
17003 case PPC::DIR_PWR7:
17004 case PPC::DIR_PWR8:
17005 case PPC::DIR_PWR9:
17006 case PPC::DIR_PWR10:
17007 case PPC::DIR_PWR11:
17008 case PPC::DIR_PWR_FUTURE: {
17009 if (!ML)
17010 break;
17011
17013 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17014 // so that we can decrease cache misses and branch-prediction misses.
17015 // Actual alignment of the loop will depend on the hotness check and other
17016 // logic in alignBlocks.
17017 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17018 return Align(32);
17019 }
17020
17021 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17022
17023 // For small loops (between 5 and 8 instructions), align to a 32-byte
17024 // boundary so that the entire loop fits in one instruction-cache line.
17025 uint64_t LoopSize = 0;
17026 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17027 for (const MachineInstr &J : **I) {
17028 LoopSize += TII->getInstSizeInBytes(J);
17029 if (LoopSize > 32)
17030 break;
17031 }
17032
17033 if (LoopSize > 16 && LoopSize <= 32)
17034 return Align(32);
17035
17036 break;
17037 }
17038 }
17039
17041}
17042
17043/// getConstraintType - Given a constraint, return the type of
17044/// constraint it is for this target.
17047 if (Constraint.size() == 1) {
17048 switch (Constraint[0]) {
17049 default: break;
17050 case 'b':
17051 case 'r':
17052 case 'f':
17053 case 'd':
17054 case 'v':
17055 case 'y':
17056 return C_RegisterClass;
17057 case 'Z':
17058 // FIXME: While Z does indicate a memory constraint, it specifically
17059 // indicates an r+r address (used in conjunction with the 'y' modifier
17060 // in the replacement string). Currently, we're forcing the base
17061 // register to be r0 in the asm printer (which is interpreted as zero)
17062 // and forming the complete address in the second register. This is
17063 // suboptimal.
17064 return C_Memory;
17065 }
17066 } else if (Constraint == "wc") { // individual CR bits.
17067 return C_RegisterClass;
17068 } else if (Constraint == "wa" || Constraint == "wd" ||
17069 Constraint == "wf" || Constraint == "ws" ||
17070 Constraint == "wi" || Constraint == "ww") {
17071 return C_RegisterClass; // VSX registers.
17072 }
17073 return TargetLowering::getConstraintType(Constraint);
17074}
17075
17076/// Examine constraint type and operand type and determine a weight value.
17077/// This object must already have been set up with the operand type
17078/// and the current alternative constraint selected.
17081 AsmOperandInfo &info, const char *constraint) const {
17083 Value *CallOperandVal = info.CallOperandVal;
17084 // If we don't have a value, we can't do a match,
17085 // but allow it at the lowest weight.
17086 if (!CallOperandVal)
17087 return CW_Default;
17088 Type *type = CallOperandVal->getType();
17089
17090 // Look at the constraint type.
17091 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17092 return CW_Register; // an individual CR bit.
17093 else if ((StringRef(constraint) == "wa" ||
17094 StringRef(constraint) == "wd" ||
17095 StringRef(constraint) == "wf") &&
17096 type->isVectorTy())
17097 return CW_Register;
17098 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17099 return CW_Register; // just hold 64-bit integers data.
17100 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17101 return CW_Register;
17102 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17103 return CW_Register;
17104
17105 switch (*constraint) {
17106 default:
17108 break;
17109 case 'b':
17110 if (type->isIntegerTy())
17111 weight = CW_Register;
17112 break;
17113 case 'f':
17114 if (type->isFloatTy())
17115 weight = CW_Register;
17116 break;
17117 case 'd':
17118 if (type->isDoubleTy())
17119 weight = CW_Register;
17120 break;
17121 case 'v':
17122 if (type->isVectorTy())
17123 weight = CW_Register;
17124 break;
17125 case 'y':
17126 weight = CW_Register;
17127 break;
17128 case 'Z':
17129 weight = CW_Memory;
17130 break;
17131 }
17132 return weight;
17133}
17134
17135std::pair<unsigned, const TargetRegisterClass *>
17137 StringRef Constraint,
17138 MVT VT) const {
17139 if (Constraint.size() == 1) {
17140 // GCC RS6000 Constraint Letters
17141 switch (Constraint[0]) {
17142 case 'b': // R1-R31
17143 if (VT == MVT::i64 && Subtarget.isPPC64())
17144 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17145 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17146 case 'r': // R0-R31
17147 if (VT == MVT::i64 && Subtarget.isPPC64())
17148 return std::make_pair(0U, &PPC::G8RCRegClass);
17149 return std::make_pair(0U, &PPC::GPRCRegClass);
17150 // 'd' and 'f' constraints are both defined to be "the floating point
17151 // registers", where one is for 32-bit and the other for 64-bit. We don't
17152 // really care overly much here so just give them all the same reg classes.
17153 case 'd':
17154 case 'f':
17155 if (Subtarget.hasSPE()) {
17156 if (VT == MVT::f32 || VT == MVT::i32)
17157 return std::make_pair(0U, &PPC::GPRCRegClass);
17158 if (VT == MVT::f64 || VT == MVT::i64)
17159 return std::make_pair(0U, &PPC::SPERCRegClass);
17160 } else {
17161 if (VT == MVT::f32 || VT == MVT::i32)
17162 return std::make_pair(0U, &PPC::F4RCRegClass);
17163 if (VT == MVT::f64 || VT == MVT::i64)
17164 return std::make_pair(0U, &PPC::F8RCRegClass);
17165 }
17166 break;
17167 case 'v':
17168 if (Subtarget.hasAltivec() && VT.isVector())
17169 return std::make_pair(0U, &PPC::VRRCRegClass);
17170 else if (Subtarget.hasVSX())
17171 // Scalars in Altivec registers only make sense with VSX.
17172 return std::make_pair(0U, &PPC::VFRCRegClass);
17173 break;
17174 case 'y': // crrc
17175 return std::make_pair(0U, &PPC::CRRCRegClass);
17176 }
17177 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17178 // An individual CR bit.
17179 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17180 } else if ((Constraint == "wa" || Constraint == "wd" ||
17181 Constraint == "wf" || Constraint == "wi") &&
17182 Subtarget.hasVSX()) {
17183 // A VSX register for either a scalar (FP) or vector. There is no
17184 // support for single precision scalars on subtargets prior to Power8.
17185 if (VT.isVector())
17186 return std::make_pair(0U, &PPC::VSRCRegClass);
17187 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17188 return std::make_pair(0U, &PPC::VSSRCRegClass);
17189 return std::make_pair(0U, &PPC::VSFRCRegClass);
17190 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17191 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17192 return std::make_pair(0U, &PPC::VSSRCRegClass);
17193 else
17194 return std::make_pair(0U, &PPC::VSFRCRegClass);
17195 } else if (Constraint == "lr") {
17196 if (VT == MVT::i64)
17197 return std::make_pair(0U, &PPC::LR8RCRegClass);
17198 else
17199 return std::make_pair(0U, &PPC::LRRCRegClass);
17200 }
17201
17202 // Handle special cases of physical registers that are not properly handled
17203 // by the base class.
17204 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17205 // If we name a VSX register, we can't defer to the base class because it
17206 // will not recognize the correct register (their names will be VSL{0-31}
17207 // and V{0-31} so they won't match). So we match them here.
17208 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17209 int VSNum = atoi(Constraint.data() + 3);
17210 assert(VSNum >= 0 && VSNum <= 63 &&
17211 "Attempted to access a vsr out of range");
17212 if (VSNum < 32)
17213 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17214 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17215 }
17216
17217 // For float registers, we can't defer to the base class as it will match
17218 // the SPILLTOVSRRC class.
17219 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17220 int RegNum = atoi(Constraint.data() + 2);
17221 if (RegNum > 31 || RegNum < 0)
17222 report_fatal_error("Invalid floating point register number");
17223 if (VT == MVT::f32 || VT == MVT::i32)
17224 return Subtarget.hasSPE()
17225 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17226 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17227 if (VT == MVT::f64 || VT == MVT::i64)
17228 return Subtarget.hasSPE()
17229 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17230 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17231 }
17232 }
17233
17234 std::pair<unsigned, const TargetRegisterClass *> R =
17236
17237 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17238 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17239 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17240 // register.
17241 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17242 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17243 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17244 PPC::GPRCRegClass.contains(R.first))
17245 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17246 PPC::sub_32, &PPC::G8RCRegClass),
17247 &PPC::G8RCRegClass);
17248
17249 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17250 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17251 R.first = PPC::CR0;
17252 R.second = &PPC::CRRCRegClass;
17253 }
17254 // FIXME: This warning should ideally be emitted in the front end.
17255 const auto &TM = getTargetMachine();
17256 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17257 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17258 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17259 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17260 errs() << "warning: vector registers 20 to 32 are reserved in the "
17261 "default AIX AltiVec ABI and cannot be used\n";
17262 }
17263
17264 return R;
17265}
17266
17267/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17268/// vector. If it is invalid, don't add anything to Ops.
17270 StringRef Constraint,
17271 std::vector<SDValue> &Ops,
17272 SelectionDAG &DAG) const {
17273 SDValue Result;
17274
17275 // Only support length 1 constraints.
17276 if (Constraint.size() > 1)
17277 return;
17278
17279 char Letter = Constraint[0];
17280 switch (Letter) {
17281 default: break;
17282 case 'I':
17283 case 'J':
17284 case 'K':
17285 case 'L':
17286 case 'M':
17287 case 'N':
17288 case 'O':
17289 case 'P': {
17290 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17291 if (!CST) return; // Must be an immediate to match.
17292 SDLoc dl(Op);
17293 int64_t Value = CST->getSExtValue();
17294 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17295 // numbers are printed as such.
17296 switch (Letter) {
17297 default: llvm_unreachable("Unknown constraint letter!");
17298 case 'I': // "I" is a signed 16-bit constant.
17299 if (isInt<16>(Value))
17300 Result = DAG.getTargetConstant(Value, dl, TCVT);
17301 break;
17302 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17303 if (isShiftedUInt<16, 16>(Value))
17304 Result = DAG.getTargetConstant(Value, dl, TCVT);
17305 break;
17306 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17307 if (isShiftedInt<16, 16>(Value))
17308 Result = DAG.getTargetConstant(Value, dl, TCVT);
17309 break;
17310 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17311 if (isUInt<16>(Value))
17312 Result = DAG.getTargetConstant(Value, dl, TCVT);
17313 break;
17314 case 'M': // "M" is a constant that is greater than 31.
17315 if (Value > 31)
17316 Result = DAG.getTargetConstant(Value, dl, TCVT);
17317 break;
17318 case 'N': // "N" is a positive constant that is an exact power of two.
17319 if (Value > 0 && isPowerOf2_64(Value))
17320 Result = DAG.getTargetConstant(Value, dl, TCVT);
17321 break;
17322 case 'O': // "O" is the constant zero.
17323 if (Value == 0)
17324 Result = DAG.getTargetConstant(Value, dl, TCVT);
17325 break;
17326 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17327 if (isInt<16>(-Value))
17328 Result = DAG.getTargetConstant(Value, dl, TCVT);
17329 break;
17330 }
17331 break;
17332 }
17333 }
17334
17335 if (Result.getNode()) {
17336 Ops.push_back(Result);
17337 return;
17338 }
17339
17340 // Handle standard constraint letters.
17341 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17342}
17343
17346 SelectionDAG &DAG) const {
17347 if (I.getNumOperands() <= 1)
17348 return;
17349 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17350 return;
17351 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17352 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17353 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17354 return;
17355
17356 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17357 Ops.push_back(DAG.getMDNode(MDN));
17358}
17359
17360// isLegalAddressingMode - Return true if the addressing mode represented
17361// by AM is legal for this target, for a load/store of the specified type.
17363 const AddrMode &AM, Type *Ty,
17364 unsigned AS,
17365 Instruction *I) const {
17366 // Vector type r+i form is supported since power9 as DQ form. We don't check
17367 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17368 // imm form is preferred and the offset can be adjusted to use imm form later
17369 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17370 // max offset to check legal addressing mode, we should be a little aggressive
17371 // to contain other offsets for that LSRUse.
17372 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17373 return false;
17374
17375 // PPC allows a sign-extended 16-bit immediate field.
17376 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17377 return false;
17378
17379 // No global is ever allowed as a base.
17380 if (AM.BaseGV)
17381 return false;
17382
17383 // PPC only support r+r,
17384 switch (AM.Scale) {
17385 case 0: // "r+i" or just "i", depending on HasBaseReg.
17386 break;
17387 case 1:
17388 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17389 return false;
17390 // Otherwise we have r+r or r+i.
17391 break;
17392 case 2:
17393 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17394 return false;
17395 // Allow 2*r as r+r.
17396 break;
17397 default:
17398 // No other scales are supported.
17399 return false;
17400 }
17401
17402 return true;
17403}
17404
17405SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17406 SelectionDAG &DAG) const {
17408 MachineFrameInfo &MFI = MF.getFrameInfo();
17409 MFI.setReturnAddressIsTaken(true);
17410
17412 return SDValue();
17413
17414 SDLoc dl(Op);
17415 unsigned Depth = Op.getConstantOperandVal(0);
17416
17417 // Make sure the function does not optimize away the store of the RA to
17418 // the stack.
17419 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17420 FuncInfo->setLRStoreRequired();
17421 auto PtrVT = getPointerTy(MF.getDataLayout());
17422
17423 if (Depth > 0) {
17424 // The link register (return address) is saved in the caller's frame
17425 // not the callee's stack frame. So we must get the caller's frame
17426 // address and load the return address at the LR offset from there.
17427 SDValue FrameAddr =
17428 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17429 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17430 SDValue Offset =
17431 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17432 Subtarget.getScalarIntVT());
17433 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17434 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17436 }
17437
17438 // Just load the return address off the stack.
17439 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17440 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17442}
17443
17444SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17445 SelectionDAG &DAG) const {
17446 SDLoc dl(Op);
17447 unsigned Depth = Op.getConstantOperandVal(0);
17448
17450 MachineFrameInfo &MFI = MF.getFrameInfo();
17451 MFI.setFrameAddressIsTaken(true);
17452
17453 EVT PtrVT = getPointerTy(MF.getDataLayout());
17454 bool isPPC64 = PtrVT == MVT::i64;
17455
17456 // Naked functions never have a frame pointer, and so we use r1. For all
17457 // other functions, this decision must be delayed until during PEI.
17458 unsigned FrameReg;
17459 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17460 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17461 else
17462 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17463
17464 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17465 PtrVT);
17466 while (Depth--)
17467 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17468 FrameAddr, MachinePointerInfo());
17469 return FrameAddr;
17470}
17471
17472#define GET_REGISTER_MATCHER
17473#include "PPCGenAsmMatcher.inc"
17474
17476 const MachineFunction &MF) const {
17477 bool IsPPC64 = Subtarget.isPPC64();
17478
17479 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
17480 if (!Is64Bit && VT != LLT::scalar(32))
17481 report_fatal_error("Invalid register global variable type");
17482
17484 if (!Reg)
17486 Twine("Invalid global name register \"" + StringRef(RegName) + "\"."));
17487
17488 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
17489 // Need followup investigation as to why.
17490 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
17491 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
17492 StringRef(RegName) + "\"."));
17493
17494 // Convert GPR to GP8R register for 64bit.
17495 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
17496 Reg = Reg.id() - PPC::R0 + PPC::X0;
17497
17498 return Reg;
17499}
17500
17502 // 32-bit SVR4 ABI access everything as got-indirect.
17503 if (Subtarget.is32BitELFABI())
17504 return true;
17505
17506 // AIX accesses everything indirectly through the TOC, which is similar to
17507 // the GOT.
17508 if (Subtarget.isAIXABI())
17509 return true;
17510
17512 // If it is small or large code model, module locals are accessed
17513 // indirectly by loading their address from .toc/.got.
17514 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17515 return true;
17516
17517 // JumpTable and BlockAddress are accessed as got-indirect.
17518 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17519 return true;
17520
17521 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17522 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17523
17524 return false;
17525}
17526
17527bool
17529 // The PowerPC target isn't yet aware of offsets.
17530 return false;
17531}
17532
17534 const CallInst &I,
17535 MachineFunction &MF,
17536 unsigned Intrinsic) const {
17537 switch (Intrinsic) {
17538 case Intrinsic::ppc_atomicrmw_xchg_i128:
17539 case Intrinsic::ppc_atomicrmw_add_i128:
17540 case Intrinsic::ppc_atomicrmw_sub_i128:
17541 case Intrinsic::ppc_atomicrmw_nand_i128:
17542 case Intrinsic::ppc_atomicrmw_and_i128:
17543 case Intrinsic::ppc_atomicrmw_or_i128:
17544 case Intrinsic::ppc_atomicrmw_xor_i128:
17545 case Intrinsic::ppc_cmpxchg_i128:
17547 Info.memVT = MVT::i128;
17548 Info.ptrVal = I.getArgOperand(0);
17549 Info.offset = 0;
17550 Info.align = Align(16);
17553 return true;
17554 case Intrinsic::ppc_atomic_load_i128:
17556 Info.memVT = MVT::i128;
17557 Info.ptrVal = I.getArgOperand(0);
17558 Info.offset = 0;
17559 Info.align = Align(16);
17561 return true;
17562 case Intrinsic::ppc_atomic_store_i128:
17564 Info.memVT = MVT::i128;
17565 Info.ptrVal = I.getArgOperand(2);
17566 Info.offset = 0;
17567 Info.align = Align(16);
17569 return true;
17570 case Intrinsic::ppc_altivec_lvx:
17571 case Intrinsic::ppc_altivec_lvxl:
17572 case Intrinsic::ppc_altivec_lvebx:
17573 case Intrinsic::ppc_altivec_lvehx:
17574 case Intrinsic::ppc_altivec_lvewx:
17575 case Intrinsic::ppc_vsx_lxvd2x:
17576 case Intrinsic::ppc_vsx_lxvw4x:
17577 case Intrinsic::ppc_vsx_lxvd2x_be:
17578 case Intrinsic::ppc_vsx_lxvw4x_be:
17579 case Intrinsic::ppc_vsx_lxvl:
17580 case Intrinsic::ppc_vsx_lxvll: {
17581 EVT VT;
17582 switch (Intrinsic) {
17583 case Intrinsic::ppc_altivec_lvebx:
17584 VT = MVT::i8;
17585 break;
17586 case Intrinsic::ppc_altivec_lvehx:
17587 VT = MVT::i16;
17588 break;
17589 case Intrinsic::ppc_altivec_lvewx:
17590 VT = MVT::i32;
17591 break;
17592 case Intrinsic::ppc_vsx_lxvd2x:
17593 case Intrinsic::ppc_vsx_lxvd2x_be:
17594 VT = MVT::v2f64;
17595 break;
17596 default:
17597 VT = MVT::v4i32;
17598 break;
17599 }
17600
17602 Info.memVT = VT;
17603 Info.ptrVal = I.getArgOperand(0);
17604 Info.offset = -VT.getStoreSize()+1;
17605 Info.size = 2*VT.getStoreSize()-1;
17606 Info.align = Align(1);
17608 return true;
17609 }
17610 case Intrinsic::ppc_altivec_stvx:
17611 case Intrinsic::ppc_altivec_stvxl:
17612 case Intrinsic::ppc_altivec_stvebx:
17613 case Intrinsic::ppc_altivec_stvehx:
17614 case Intrinsic::ppc_altivec_stvewx:
17615 case Intrinsic::ppc_vsx_stxvd2x:
17616 case Intrinsic::ppc_vsx_stxvw4x:
17617 case Intrinsic::ppc_vsx_stxvd2x_be:
17618 case Intrinsic::ppc_vsx_stxvw4x_be:
17619 case Intrinsic::ppc_vsx_stxvl:
17620 case Intrinsic::ppc_vsx_stxvll: {
17621 EVT VT;
17622 switch (Intrinsic) {
17623 case Intrinsic::ppc_altivec_stvebx:
17624 VT = MVT::i8;
17625 break;
17626 case Intrinsic::ppc_altivec_stvehx:
17627 VT = MVT::i16;
17628 break;
17629 case Intrinsic::ppc_altivec_stvewx:
17630 VT = MVT::i32;
17631 break;
17632 case Intrinsic::ppc_vsx_stxvd2x:
17633 case Intrinsic::ppc_vsx_stxvd2x_be:
17634 VT = MVT::v2f64;
17635 break;
17636 default:
17637 VT = MVT::v4i32;
17638 break;
17639 }
17640
17642 Info.memVT = VT;
17643 Info.ptrVal = I.getArgOperand(1);
17644 Info.offset = -VT.getStoreSize()+1;
17645 Info.size = 2*VT.getStoreSize()-1;
17646 Info.align = Align(1);
17648 return true;
17649 }
17650 case Intrinsic::ppc_stdcx:
17651 case Intrinsic::ppc_stwcx:
17652 case Intrinsic::ppc_sthcx:
17653 case Intrinsic::ppc_stbcx: {
17654 EVT VT;
17655 auto Alignment = Align(8);
17656 switch (Intrinsic) {
17657 case Intrinsic::ppc_stdcx:
17658 VT = MVT::i64;
17659 break;
17660 case Intrinsic::ppc_stwcx:
17661 VT = MVT::i32;
17662 Alignment = Align(4);
17663 break;
17664 case Intrinsic::ppc_sthcx:
17665 VT = MVT::i16;
17666 Alignment = Align(2);
17667 break;
17668 case Intrinsic::ppc_stbcx:
17669 VT = MVT::i8;
17670 Alignment = Align(1);
17671 break;
17672 }
17674 Info.memVT = VT;
17675 Info.ptrVal = I.getArgOperand(0);
17676 Info.offset = 0;
17677 Info.align = Alignment;
17679 return true;
17680 }
17681 default:
17682 break;
17683 }
17684
17685 return false;
17686}
17687
17688/// It returns EVT::Other if the type should be determined using generic
17689/// target-independent logic.
17691 const MemOp &Op, const AttributeList &FuncAttributes) const {
17692 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17693 // We should use Altivec/VSX loads and stores when available. For unaligned
17694 // addresses, unaligned VSX loads are only fast starting with the P8.
17695 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17696 if (Op.isMemset() && Subtarget.hasVSX()) {
17697 uint64_t TailSize = Op.size() % 16;
17698 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17699 // element if vector element type matches tail store. For tail size
17700 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17701 if (TailSize > 2 && TailSize <= 4) {
17702 return MVT::v8i16;
17703 }
17704 return MVT::v4i32;
17705 }
17706 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17707 return MVT::v4i32;
17708 }
17709 }
17710
17711 if (Subtarget.isPPC64()) {
17712 return MVT::i64;
17713 }
17714
17715 return MVT::i32;
17716}
17717
17718/// Returns true if it is beneficial to convert a load of a constant
17719/// to just the constant itself.
17721 Type *Ty) const {
17722 assert(Ty->isIntegerTy());
17723
17724 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17725 return !(BitSize == 0 || BitSize > 64);
17726}
17727
17729 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17730 return false;
17731 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17732 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17733 return NumBits1 == 64 && NumBits2 == 32;
17734}
17735
17737 if (!VT1.isInteger() || !VT2.isInteger())
17738 return false;
17739 unsigned NumBits1 = VT1.getSizeInBits();
17740 unsigned NumBits2 = VT2.getSizeInBits();
17741 return NumBits1 == 64 && NumBits2 == 32;
17742}
17743
17745 // Generally speaking, zexts are not free, but they are free when they can be
17746 // folded with other operations.
17747 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17748 EVT MemVT = LD->getMemoryVT();
17749 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17750 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17751 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17752 LD->getExtensionType() == ISD::ZEXTLOAD))
17753 return true;
17754 }
17755
17756 // FIXME: Add other cases...
17757 // - 32-bit shifts with a zext to i64
17758 // - zext after ctlz, bswap, etc.
17759 // - zext after and by a constant mask
17760
17761 return TargetLowering::isZExtFree(Val, VT2);
17762}
17763
17764bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17765 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17766 "invalid fpext types");
17767 // Extending to float128 is not free.
17768 if (DestVT == MVT::f128)
17769 return false;
17770 return true;
17771}
17772
17774 return isInt<16>(Imm) || isUInt<16>(Imm);
17775}
17776
17778 return isInt<16>(Imm) || isUInt<16>(Imm);
17779}
17780
17783 unsigned *Fast) const {
17785 return false;
17786
17787 // PowerPC supports unaligned memory access for simple non-vector types.
17788 // Although accessing unaligned addresses is not as efficient as accessing
17789 // aligned addresses, it is generally more efficient than manual expansion,
17790 // and generally only traps for software emulation when crossing page
17791 // boundaries.
17792
17793 if (!VT.isSimple())
17794 return false;
17795
17796 if (VT.isFloatingPoint() && !VT.isVector() &&
17797 !Subtarget.allowsUnalignedFPAccess())
17798 return false;
17799
17800 if (VT.getSimpleVT().isVector()) {
17801 if (Subtarget.hasVSX()) {
17802 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17803 VT != MVT::v4f32 && VT != MVT::v4i32)
17804 return false;
17805 } else {
17806 return false;
17807 }
17808 }
17809
17810 if (VT == MVT::ppcf128)
17811 return false;
17812
17813 if (Fast)
17814 *Fast = 1;
17815
17816 return true;
17817}
17818
17820 SDValue C) const {
17821 // Check integral scalar types.
17822 if (!VT.isScalarInteger())
17823 return false;
17824 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17825 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17826 return false;
17827 // This transformation will generate >= 2 operations. But the following
17828 // cases will generate <= 2 instructions during ISEL. So exclude them.
17829 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17830 // HW instruction, ie. MULLI
17831 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17832 // instruction is needed than case 1, ie. MULLI and RLDICR
17833 int64_t Imm = ConstNode->getSExtValue();
17834 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17835 Imm >>= Shift;
17836 if (isInt<16>(Imm))
17837 return false;
17838 uint64_t UImm = static_cast<uint64_t>(Imm);
17839 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17840 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17841 return true;
17842 }
17843 return false;
17844}
17845
17847 EVT VT) const {
17850}
17851
17853 Type *Ty) const {
17854 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17855 return false;
17856 switch (Ty->getScalarType()->getTypeID()) {
17857 case Type::FloatTyID:
17858 case Type::DoubleTyID:
17859 return true;
17860 case Type::FP128TyID:
17861 return Subtarget.hasP9Vector();
17862 default:
17863 return false;
17864 }
17865}
17866
17867// FIXME: add more patterns which are not profitable to hoist.
17869 if (!I->hasOneUse())
17870 return true;
17871
17872 Instruction *User = I->user_back();
17873 assert(User && "A single use instruction with no uses.");
17874
17875 switch (I->getOpcode()) {
17876 case Instruction::FMul: {
17877 // Don't break FMA, PowerPC prefers FMA.
17878 if (User->getOpcode() != Instruction::FSub &&
17879 User->getOpcode() != Instruction::FAdd)
17880 return true;
17881
17883 const Function *F = I->getFunction();
17884 const DataLayout &DL = F->getDataLayout();
17885 Type *Ty = User->getOperand(0)->getType();
17886
17887 return !(
17890 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17891 }
17892 case Instruction::Load: {
17893 // Don't break "store (load float*)" pattern, this pattern will be combined
17894 // to "store (load int32)" in later InstCombine pass. See function
17895 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17896 // cycles than loading a 32 bit integer.
17897 LoadInst *LI = cast<LoadInst>(I);
17898 // For the loads that combineLoadToOperationType does nothing, like
17899 // ordered load, it should be profitable to hoist them.
17900 // For swifterror load, it can only be used for pointer to pointer type, so
17901 // later type check should get rid of this case.
17902 if (!LI->isUnordered())
17903 return true;
17904
17905 if (User->getOpcode() != Instruction::Store)
17906 return true;
17907
17908 if (I->getType()->getTypeID() != Type::FloatTyID)
17909 return true;
17910
17911 return false;
17912 }
17913 default:
17914 return true;
17915 }
17916 return true;
17917}
17918
17919const MCPhysReg *
17921 // LR is a callee-save register, but we must treat it as clobbered by any call
17922 // site. Hence we include LR in the scratch registers, which are in turn added
17923 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17924 // to CTR, which is used by any indirect call.
17925 static const MCPhysReg ScratchRegs[] = {
17926 PPC::X12, PPC::LR8, PPC::CTR8, 0
17927 };
17928
17929 return ScratchRegs;
17930}
17931
17933 const Constant *PersonalityFn) const {
17934 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17935}
17936
17938 const Constant *PersonalityFn) const {
17939 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17940}
17941
17942bool
17944 EVT VT , unsigned DefinedValues) const {
17945 if (VT == MVT::v2i64)
17946 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17947
17948 if (Subtarget.hasVSX())
17949 return true;
17950
17952}
17953
17955 if (DisableILPPref || Subtarget.enableMachineScheduler())
17957
17958 return Sched::ILP;
17959}
17960
17961// Create a fast isel object.
17962FastISel *
17964 const TargetLibraryInfo *LibInfo) const {
17965 return PPC::createFastISel(FuncInfo, LibInfo);
17966}
17967
17968// 'Inverted' means the FMA opcode after negating one multiplicand.
17969// For example, (fma -a b c) = (fnmsub a b c)
17970static unsigned invertFMAOpcode(unsigned Opc) {
17971 switch (Opc) {
17972 default:
17973 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17974 case ISD::FMA:
17975 return PPCISD::FNMSUB;
17976 case PPCISD::FNMSUB:
17977 return ISD::FMA;
17978 }
17979}
17980
17982 bool LegalOps, bool OptForSize,
17984 unsigned Depth) const {
17986 return SDValue();
17987
17988 unsigned Opc = Op.getOpcode();
17989 EVT VT = Op.getValueType();
17990 SDNodeFlags Flags = Op.getNode()->getFlags();
17991
17992 switch (Opc) {
17993 case PPCISD::FNMSUB:
17994 if (!Op.hasOneUse() || !isTypeLegal(VT))
17995 break;
17996
17998 SDValue N0 = Op.getOperand(0);
17999 SDValue N1 = Op.getOperand(1);
18000 SDValue N2 = Op.getOperand(2);
18001 SDLoc Loc(Op);
18002
18004 SDValue NegN2 =
18005 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18006
18007 if (!NegN2)
18008 return SDValue();
18009
18010 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18011 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18012 // These transformations may change sign of zeroes. For example,
18013 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18014 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18015 // Try and choose the cheaper one to negate.
18017 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18018 N0Cost, Depth + 1);
18019
18021 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18022 N1Cost, Depth + 1);
18023
18024 if (NegN0 && N0Cost <= N1Cost) {
18025 Cost = std::min(N0Cost, N2Cost);
18026 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18027 } else if (NegN1) {
18028 Cost = std::min(N1Cost, N2Cost);
18029 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18030 }
18031 }
18032
18033 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18034 if (isOperationLegal(ISD::FMA, VT)) {
18035 Cost = N2Cost;
18036 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18037 }
18038
18039 break;
18040 }
18041
18042 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18043 Cost, Depth);
18044}
18045
18046// Override to enable LOAD_STACK_GUARD lowering on Linux.
18048 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18049 return true;
18051}
18052
18053// Override to disable global variable loading on Linux and insert AIX canary
18054// word declaration.
18056 if (Subtarget.isAIXABI()) {
18057 M.getOrInsertGlobal(AIXSSPCanaryWordName,
18058 PointerType::getUnqual(M.getContext()));
18059 return;
18060 }
18061 if (!Subtarget.isTargetLinux())
18063}
18064
18066 if (Subtarget.isAIXABI())
18067 return M.getGlobalVariable(AIXSSPCanaryWordName);
18069}
18070
18072 bool ForCodeSize) const {
18073 if (!VT.isSimple() || !Subtarget.hasVSX())
18074 return false;
18075
18076 switch(VT.getSimpleVT().SimpleTy) {
18077 default:
18078 // For FP types that are currently not supported by PPC backend, return
18079 // false. Examples: f16, f80.
18080 return false;
18081 case MVT::f32:
18082 case MVT::f64: {
18083 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18084 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18085 return true;
18086 }
18087 bool IsExact;
18088 APSInt IntResult(16, false);
18089 // The rounding mode doesn't really matter because we only care about floats
18090 // that can be converted to integers exactly.
18091 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18092 // For exact values in the range [-16, 15] we can materialize the float.
18093 if (IsExact && IntResult <= 15 && IntResult >= -16)
18094 return true;
18095 return Imm.isZero();
18096 }
18097 case MVT::ppcf128:
18098 return Imm.isPosZero();
18099 }
18100}
18101
18102// For vector shift operation op, fold
18103// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18105 SelectionDAG &DAG) {
18106 SDValue N0 = N->getOperand(0);
18107 SDValue N1 = N->getOperand(1);
18108 EVT VT = N0.getValueType();
18109 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18110 unsigned Opcode = N->getOpcode();
18111 unsigned TargetOpcode;
18112
18113 switch (Opcode) {
18114 default:
18115 llvm_unreachable("Unexpected shift operation");
18116 case ISD::SHL:
18117 TargetOpcode = PPCISD::SHL;
18118 break;
18119 case ISD::SRL:
18120 TargetOpcode = PPCISD::SRL;
18121 break;
18122 case ISD::SRA:
18123 TargetOpcode = PPCISD::SRA;
18124 break;
18125 }
18126
18127 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18128 N1->getOpcode() == ISD::AND)
18129 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18130 if (Mask->getZExtValue() == OpSizeInBits - 1)
18131 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18132
18133 return SDValue();
18134}
18135
18136SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18137 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18138 return Value;
18139
18140 SDValue N0 = N->getOperand(0);
18141 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18142 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18143 N0.getOpcode() != ISD::SIGN_EXTEND ||
18144 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18145 N->getValueType(0) != MVT::i64)
18146 return SDValue();
18147
18148 // We can't save an operation here if the value is already extended, and
18149 // the existing shift is easier to combine.
18150 SDValue ExtsSrc = N0.getOperand(0);
18151 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18152 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18153 return SDValue();
18154
18155 SDLoc DL(N0);
18156 SDValue ShiftBy = SDValue(CN1, 0);
18157 // We want the shift amount to be i32 on the extswli, but the shift could
18158 // have an i64.
18159 if (ShiftBy.getValueType() == MVT::i64)
18160 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18161
18162 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18163 ShiftBy);
18164}
18165
18166SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18167 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18168 return Value;
18169
18170 return SDValue();
18171}
18172
18173SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18174 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18175 return Value;
18176
18177 return SDValue();
18178}
18179
18180// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18181// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18182// When C is zero, the equation (addi Z, -C) can be simplified to Z
18183// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18185 const PPCSubtarget &Subtarget) {
18186 if (!Subtarget.isPPC64())
18187 return SDValue();
18188
18189 SDValue LHS = N->getOperand(0);
18190 SDValue RHS = N->getOperand(1);
18191
18192 auto isZextOfCompareWithConstant = [](SDValue Op) {
18193 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18194 Op.getValueType() != MVT::i64)
18195 return false;
18196
18197 SDValue Cmp = Op.getOperand(0);
18198 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18199 Cmp.getOperand(0).getValueType() != MVT::i64)
18200 return false;
18201
18202 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18203 int64_t NegConstant = 0 - Constant->getSExtValue();
18204 // Due to the limitations of the addi instruction,
18205 // -C is required to be [-32768, 32767].
18206 return isInt<16>(NegConstant);
18207 }
18208
18209 return false;
18210 };
18211
18212 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18213 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18214
18215 // If there is a pattern, canonicalize a zext operand to the RHS.
18216 if (LHSHasPattern && !RHSHasPattern)
18217 std::swap(LHS, RHS);
18218 else if (!LHSHasPattern && !RHSHasPattern)
18219 return SDValue();
18220
18221 SDLoc DL(N);
18222 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
18223 SDValue Cmp = RHS.getOperand(0);
18224 SDValue Z = Cmp.getOperand(0);
18225 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18226 int64_t NegConstant = 0 - Constant->getSExtValue();
18227
18228 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18229 default: break;
18230 case ISD::SETNE: {
18231 // when C == 0
18232 // --> addze X, (addic Z, -1).carry
18233 // /
18234 // add X, (zext(setne Z, C))--
18235 // \ when -32768 <= -C <= 32767 && C != 0
18236 // --> addze X, (addic (addi Z, -C), -1).carry
18237 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18238 DAG.getConstant(NegConstant, DL, MVT::i64));
18239 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18240 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18241 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
18242 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18243 SDValue(Addc.getNode(), 1));
18244 }
18245 case ISD::SETEQ: {
18246 // when C == 0
18247 // --> addze X, (subfic Z, 0).carry
18248 // /
18249 // add X, (zext(sete Z, C))--
18250 // \ when -32768 <= -C <= 32767 && C != 0
18251 // --> addze X, (subfic (addi Z, -C), 0).carry
18252 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18253 DAG.getConstant(NegConstant, DL, MVT::i64));
18254 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18255 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18256 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
18257 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18258 SDValue(Subc.getNode(), 1));
18259 }
18260 }
18261
18262 return SDValue();
18263}
18264
18265// Transform
18266// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18267// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18268// In this case both C1 and C2 must be known constants.
18269// C1+C2 must fit into a 34 bit signed integer.
18271 const PPCSubtarget &Subtarget) {
18272 if (!Subtarget.isUsingPCRelativeCalls())
18273 return SDValue();
18274
18275 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18276 // If we find that node try to cast the Global Address and the Constant.
18277 SDValue LHS = N->getOperand(0);
18278 SDValue RHS = N->getOperand(1);
18279
18280 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18281 std::swap(LHS, RHS);
18282
18283 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18284 return SDValue();
18285
18286 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18287 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18288 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18289
18290 // Check that both casts succeeded.
18291 if (!GSDN || !ConstNode)
18292 return SDValue();
18293
18294 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18295 SDLoc DL(GSDN);
18296
18297 // The signed int offset needs to fit in 34 bits.
18298 if (!isInt<34>(NewOffset))
18299 return SDValue();
18300
18301 // The new global address is a copy of the old global address except
18302 // that it has the updated Offset.
18303 SDValue GA =
18304 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18305 NewOffset, GSDN->getTargetFlags());
18306 SDValue MatPCRel =
18307 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18308 return MatPCRel;
18309}
18310
18311SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18312 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18313 return Value;
18314
18315 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18316 return Value;
18317
18318 return SDValue();
18319}
18320
18321// Detect TRUNCATE operations on bitcasts of float128 values.
18322// What we are looking for here is the situtation where we extract a subset
18323// of bits from a 128 bit float.
18324// This can be of two forms:
18325// 1) BITCAST of f128 feeding TRUNCATE
18326// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18327// The reason this is required is because we do not have a legal i128 type
18328// and so we want to prevent having to store the f128 and then reload part
18329// of it.
18330SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18331 DAGCombinerInfo &DCI) const {
18332 // If we are using CRBits then try that first.
18333 if (Subtarget.useCRBits()) {
18334 // Check if CRBits did anything and return that if it did.
18335 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18336 return CRTruncValue;
18337 }
18338
18339 SDLoc dl(N);
18340 SDValue Op0 = N->getOperand(0);
18341
18342 // Looking for a truncate of i128 to i64.
18343 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18344 return SDValue();
18345
18346 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18347
18348 // SRL feeding TRUNCATE.
18349 if (Op0.getOpcode() == ISD::SRL) {
18350 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18351 // The right shift has to be by 64 bits.
18352 if (!ConstNode || ConstNode->getZExtValue() != 64)
18353 return SDValue();
18354
18355 // Switch the element number to extract.
18356 EltToExtract = EltToExtract ? 0 : 1;
18357 // Update Op0 past the SRL.
18358 Op0 = Op0.getOperand(0);
18359 }
18360
18361 // BITCAST feeding a TRUNCATE possibly via SRL.
18362 if (Op0.getOpcode() == ISD::BITCAST &&
18363 Op0.getValueType() == MVT::i128 &&
18364 Op0.getOperand(0).getValueType() == MVT::f128) {
18365 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18366 return DCI.DAG.getNode(
18367 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18368 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18369 }
18370 return SDValue();
18371}
18372
18373SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18374 SelectionDAG &DAG = DCI.DAG;
18375
18376 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18377 if (!ConstOpOrElement)
18378 return SDValue();
18379
18380 // An imul is usually smaller than the alternative sequence for legal type.
18382 isOperationLegal(ISD::MUL, N->getValueType(0)))
18383 return SDValue();
18384
18385 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18386 switch (this->Subtarget.getCPUDirective()) {
18387 default:
18388 // TODO: enhance the condition for subtarget before pwr8
18389 return false;
18390 case PPC::DIR_PWR8:
18391 // type mul add shl
18392 // scalar 4 1 1
18393 // vector 7 2 2
18394 return true;
18395 case PPC::DIR_PWR9:
18396 case PPC::DIR_PWR10:
18397 case PPC::DIR_PWR11:
18399 // type mul add shl
18400 // scalar 5 2 2
18401 // vector 7 2 2
18402
18403 // The cycle RATIO of related operations are showed as a table above.
18404 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18405 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18406 // are 4, it is always profitable; but for 3 instrs patterns
18407 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18408 // So we should only do it for vector type.
18409 return IsAddOne && IsNeg ? VT.isVector() : true;
18410 }
18411 };
18412
18413 EVT VT = N->getValueType(0);
18414 SDLoc DL(N);
18415
18416 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18417 bool IsNeg = MulAmt.isNegative();
18418 APInt MulAmtAbs = MulAmt.abs();
18419
18420 if ((MulAmtAbs - 1).isPowerOf2()) {
18421 // (mul x, 2^N + 1) => (add (shl x, N), x)
18422 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18423
18424 if (!IsProfitable(IsNeg, true, VT))
18425 return SDValue();
18426
18427 SDValue Op0 = N->getOperand(0);
18428 SDValue Op1 =
18429 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18430 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18431 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18432
18433 if (!IsNeg)
18434 return Res;
18435
18436 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18437 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18438 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18439 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18440
18441 if (!IsProfitable(IsNeg, false, VT))
18442 return SDValue();
18443
18444 SDValue Op0 = N->getOperand(0);
18445 SDValue Op1 =
18446 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18447 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18448
18449 if (!IsNeg)
18450 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18451 else
18452 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18453
18454 } else {
18455 return SDValue();
18456 }
18457}
18458
18459// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18460// in combiner since we need to check SD flags and other subtarget features.
18461SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18462 DAGCombinerInfo &DCI) const {
18463 SDValue N0 = N->getOperand(0);
18464 SDValue N1 = N->getOperand(1);
18465 SDValue N2 = N->getOperand(2);
18466 SDNodeFlags Flags = N->getFlags();
18467 EVT VT = N->getValueType(0);
18468 SelectionDAG &DAG = DCI.DAG;
18470 unsigned Opc = N->getOpcode();
18471 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18472 bool LegalOps = !DCI.isBeforeLegalizeOps();
18473 SDLoc Loc(N);
18474
18475 if (!isOperationLegal(ISD::FMA, VT))
18476 return SDValue();
18477
18478 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18479 // since (fnmsub a b c)=-0 while c-ab=+0.
18480 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18481 return SDValue();
18482
18483 // (fma (fneg a) b c) => (fnmsub a b c)
18484 // (fnmsub (fneg a) b c) => (fma a b c)
18485 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18486 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18487
18488 // (fma a (fneg b) c) => (fnmsub a b c)
18489 // (fnmsub a (fneg b) c) => (fma a b c)
18490 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18491 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18492
18493 return SDValue();
18494}
18495
18496bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18497 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18498 if (!Subtarget.is64BitELFABI())
18499 return false;
18500
18501 // If not a tail call then no need to proceed.
18502 if (!CI->isTailCall())
18503 return false;
18504
18505 // If sibling calls have been disabled and tail-calls aren't guaranteed
18506 // there is no reason to duplicate.
18507 auto &TM = getTargetMachine();
18508 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18509 return false;
18510
18511 // Can't tail call a function called indirectly, or if it has variadic args.
18512 const Function *Callee = CI->getCalledFunction();
18513 if (!Callee || Callee->isVarArg())
18514 return false;
18515
18516 // Make sure the callee and caller calling conventions are eligible for tco.
18517 const Function *Caller = CI->getParent()->getParent();
18518 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18519 CI->getCallingConv()))
18520 return false;
18521
18522 // If the function is local then we have a good chance at tail-calling it
18523 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18524}
18525
18526bool PPCTargetLowering::
18527isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18528 const Value *Mask = AndI.getOperand(1);
18529 // If the mask is suitable for andi. or andis. we should sink the and.
18530 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18531 // Can't handle constants wider than 64-bits.
18532 if (CI->getBitWidth() > 64)
18533 return false;
18534 int64_t ConstVal = CI->getZExtValue();
18535 return isUInt<16>(ConstVal) ||
18536 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18537 }
18538
18539 // For non-constant masks, we can always use the record-form and.
18540 return true;
18541}
18542
18543/// getAddrModeForFlags - Based on the set of address flags, select the most
18544/// optimal instruction format to match by.
18545PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18546 // This is not a node we should be handling here.
18547 if (Flags == PPC::MOF_None)
18548 return PPC::AM_None;
18549 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18550 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18551 if ((Flags & FlagSet) == FlagSet)
18552 return PPC::AM_DForm;
18553 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18554 if ((Flags & FlagSet) == FlagSet)
18555 return PPC::AM_DSForm;
18556 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18557 if ((Flags & FlagSet) == FlagSet)
18558 return PPC::AM_DQForm;
18559 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18560 if ((Flags & FlagSet) == FlagSet)
18561 return PPC::AM_PrefixDForm;
18562 // If no other forms are selected, return an X-Form as it is the most
18563 // general addressing mode.
18564 return PPC::AM_XForm;
18565}
18566
18567/// Set alignment flags based on whether or not the Frame Index is aligned.
18568/// Utilized when computing flags for address computation when selecting
18569/// load and store instructions.
18570static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18571 SelectionDAG &DAG) {
18572 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18573 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18574 if (!FI)
18575 return;
18577 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18578 // If this is (add $FI, $S16Imm), the alignment flags are already set
18579 // based on the immediate. We just need to clear the alignment flags
18580 // if the FI alignment is weaker.
18581 if ((FrameIndexAlign % 4) != 0)
18582 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18583 if ((FrameIndexAlign % 16) != 0)
18584 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18585 // If the address is a plain FrameIndex, set alignment flags based on
18586 // FI alignment.
18587 if (!IsAdd) {
18588 if ((FrameIndexAlign % 4) == 0)
18589 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18590 if ((FrameIndexAlign % 16) == 0)
18591 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18592 }
18593}
18594
18595/// Given a node, compute flags that are used for address computation when
18596/// selecting load and store instructions. The flags computed are stored in
18597/// FlagSet. This function takes into account whether the node is a constant,
18598/// an ADD, OR, or a constant, and computes the address flags accordingly.
18599static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18600 SelectionDAG &DAG) {
18601 // Set the alignment flags for the node depending on if the node is
18602 // 4-byte or 16-byte aligned.
18603 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18604 if ((Imm & 0x3) == 0)
18605 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18606 if ((Imm & 0xf) == 0)
18607 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18608 };
18609
18610 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18611 // All 32-bit constants can be computed as LIS + Disp.
18612 const APInt &ConstImm = CN->getAPIntValue();
18613 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18614 FlagSet |= PPC::MOF_AddrIsSImm32;
18615 SetAlignFlagsForImm(ConstImm.getZExtValue());
18616 setAlignFlagsForFI(N, FlagSet, DAG);
18617 }
18618 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18619 FlagSet |= PPC::MOF_RPlusSImm34;
18620 else // Let constant materialization handle large constants.
18621 FlagSet |= PPC::MOF_NotAddNorCst;
18622 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18623 // This address can be represented as an addition of:
18624 // - Register + Imm16 (possibly a multiple of 4/16)
18625 // - Register + Imm34
18626 // - Register + PPCISD::Lo
18627 // - Register + Register
18628 // In any case, we won't have to match this as Base + Zero.
18629 SDValue RHS = N.getOperand(1);
18630 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18631 const APInt &ConstImm = CN->getAPIntValue();
18632 if (ConstImm.isSignedIntN(16)) {
18633 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18634 SetAlignFlagsForImm(ConstImm.getZExtValue());
18635 setAlignFlagsForFI(N, FlagSet, DAG);
18636 }
18637 if (ConstImm.isSignedIntN(34))
18638 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18639 else
18640 FlagSet |= PPC::MOF_RPlusR; // Register.
18641 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18642 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18643 else
18644 FlagSet |= PPC::MOF_RPlusR;
18645 } else { // The address computation is not a constant or an addition.
18646 setAlignFlagsForFI(N, FlagSet, DAG);
18647 FlagSet |= PPC::MOF_NotAddNorCst;
18648 }
18649}
18650
18651static bool isPCRelNode(SDValue N) {
18652 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18653 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18654 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18655 isValidPCRelNode<JumpTableSDNode>(N) ||
18656 isValidPCRelNode<BlockAddressSDNode>(N));
18657}
18658
18659/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18660/// the address flags of the load/store instruction that is to be matched.
18661unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18662 SelectionDAG &DAG) const {
18663 unsigned FlagSet = PPC::MOF_None;
18664
18665 // Compute subtarget flags.
18666 if (!Subtarget.hasP9Vector())
18667 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18668 else
18669 FlagSet |= PPC::MOF_SubtargetP9;
18670
18671 if (Subtarget.hasPrefixInstrs())
18672 FlagSet |= PPC::MOF_SubtargetP10;
18673
18674 if (Subtarget.hasSPE())
18675 FlagSet |= PPC::MOF_SubtargetSPE;
18676
18677 // Check if we have a PCRel node and return early.
18678 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18679 return FlagSet;
18680
18681 // If the node is the paired load/store intrinsics, compute flags for
18682 // address computation and return early.
18683 unsigned ParentOp = Parent->getOpcode();
18684 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18685 (ParentOp == ISD::INTRINSIC_VOID))) {
18686 unsigned ID = Parent->getConstantOperandVal(1);
18687 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18688 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18689 ? Parent->getOperand(2)
18690 : Parent->getOperand(3);
18691 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18692 FlagSet |= PPC::MOF_Vector;
18693 return FlagSet;
18694 }
18695 }
18696
18697 // Mark this as something we don't want to handle here if it is atomic
18698 // or pre-increment instruction.
18699 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18700 if (LSB->isIndexed())
18701 return PPC::MOF_None;
18702
18703 // Compute in-memory type flags. This is based on if there are scalars,
18704 // floats or vectors.
18705 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18706 assert(MN && "Parent should be a MemSDNode!");
18707 EVT MemVT = MN->getMemoryVT();
18708 unsigned Size = MemVT.getSizeInBits();
18709 if (MemVT.isScalarInteger()) {
18710 assert(Size <= 128 &&
18711 "Not expecting scalar integers larger than 16 bytes!");
18712 if (Size < 32)
18713 FlagSet |= PPC::MOF_SubWordInt;
18714 else if (Size == 32)
18715 FlagSet |= PPC::MOF_WordInt;
18716 else
18717 FlagSet |= PPC::MOF_DoubleWordInt;
18718 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18719 if (Size == 128)
18720 FlagSet |= PPC::MOF_Vector;
18721 else if (Size == 256) {
18722 assert(Subtarget.pairedVectorMemops() &&
18723 "256-bit vectors are only available when paired vector memops is "
18724 "enabled!");
18725 FlagSet |= PPC::MOF_Vector;
18726 } else
18727 llvm_unreachable("Not expecting illegal vectors!");
18728 } else { // Floating point type: can be scalar, f128 or vector types.
18729 if (Size == 32 || Size == 64)
18730 FlagSet |= PPC::MOF_ScalarFloat;
18731 else if (MemVT == MVT::f128 || MemVT.isVector())
18732 FlagSet |= PPC::MOF_Vector;
18733 else
18734 llvm_unreachable("Not expecting illegal scalar floats!");
18735 }
18736
18737 // Compute flags for address computation.
18738 computeFlagsForAddressComputation(N, FlagSet, DAG);
18739
18740 // Compute type extension flags.
18741 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18742 switch (LN->getExtensionType()) {
18743 case ISD::SEXTLOAD:
18744 FlagSet |= PPC::MOF_SExt;
18745 break;
18746 case ISD::EXTLOAD:
18747 case ISD::ZEXTLOAD:
18748 FlagSet |= PPC::MOF_ZExt;
18749 break;
18750 case ISD::NON_EXTLOAD:
18751 FlagSet |= PPC::MOF_NoExt;
18752 break;
18753 }
18754 } else
18755 FlagSet |= PPC::MOF_NoExt;
18756
18757 // For integers, no extension is the same as zero extension.
18758 // We set the extension mode to zero extension so we don't have
18759 // to add separate entries in AddrModesMap for loads and stores.
18760 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18761 FlagSet |= PPC::MOF_ZExt;
18762 FlagSet &= ~PPC::MOF_NoExt;
18763 }
18764
18765 // If we don't have prefixed instructions, 34-bit constants should be
18766 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18767 bool IsNonP1034BitConst =
18769 FlagSet) == PPC::MOF_RPlusSImm34;
18770 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18771 IsNonP1034BitConst)
18772 FlagSet |= PPC::MOF_NotAddNorCst;
18773
18774 return FlagSet;
18775}
18776
18777/// SelectForceXFormMode - Given the specified address, force it to be
18778/// represented as an indexed [r+r] operation (an XForm instruction).
18780 SDValue &Base,
18781 SelectionDAG &DAG) const {
18782
18784 int16_t ForceXFormImm = 0;
18785 if (provablyDisjointOr(DAG, N) &&
18786 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18787 Disp = N.getOperand(0);
18788 Base = N.getOperand(1);
18789 return Mode;
18790 }
18791
18792 // If the address is the result of an add, we will utilize the fact that the
18793 // address calculation includes an implicit add. However, we can reduce
18794 // register pressure if we do not materialize a constant just for use as the
18795 // index register. We only get rid of the add if it is not an add of a
18796 // value and a 16-bit signed constant and both have a single use.
18797 if (N.getOpcode() == ISD::ADD &&
18798 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18799 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18800 Disp = N.getOperand(0);
18801 Base = N.getOperand(1);
18802 return Mode;
18803 }
18804
18805 // Otherwise, use R0 as the base register.
18806 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18807 N.getValueType());
18808 Base = N;
18809
18810 return Mode;
18811}
18812
18814 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18815 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18816 EVT ValVT = Val.getValueType();
18817 // If we are splitting a scalar integer into f64 parts (i.e. so they
18818 // can be placed into VFRC registers), we need to zero extend and
18819 // bitcast the values. This will ensure the value is placed into a
18820 // VSR using direct moves or stack operations as needed.
18821 if (PartVT == MVT::f64 &&
18822 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18823 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18824 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18825 Parts[0] = Val;
18826 return true;
18827 }
18828 return false;
18829}
18830
18831SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18832 SelectionDAG &DAG) const {
18833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18835 EVT RetVT = Op.getValueType();
18836 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18837 SDValue Callee =
18838 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18839 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
18842 for (const SDValue &N : Op->op_values()) {
18843 EVT ArgVT = N.getValueType();
18844 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18845 Entry.Node = N;
18846 Entry.Ty = ArgTy;
18847 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
18848 Entry.IsZExt = !Entry.IsSExt;
18849 Args.push_back(Entry);
18850 }
18851
18852 SDValue InChain = DAG.getEntryNode();
18853 SDValue TCChain = InChain;
18854 const Function &F = DAG.getMachineFunction().getFunction();
18855 bool isTailCall =
18856 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18857 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18858 if (isTailCall)
18859 InChain = TCChain;
18860 CLI.setDebugLoc(SDLoc(Op))
18861 .setChain(InChain)
18862 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18863 .setTailCall(isTailCall)
18864 .setSExtResult(SignExtend)
18865 .setZExtResult(!SignExtend)
18867 return TLI.LowerCallTo(CLI).first;
18868}
18869
18870SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18871 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18872 SelectionDAG &DAG) const {
18873 if (Op.getValueType() == MVT::f32)
18874 return lowerToLibCall(LibCallFloatName, Op, DAG);
18875
18876 if (Op.getValueType() == MVT::f64)
18877 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18878
18879 return SDValue();
18880}
18881
18882bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18883 SDNodeFlags Flags = Op.getNode()->getFlags();
18884 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18885 Flags.hasNoNaNs() && Flags.hasNoInfs();
18886}
18887
18888bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18889 return Op.getNode()->getFlags().hasApproximateFuncs();
18890}
18891
18892bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18894}
18895
18896SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18897 const char *LibCallFloatName,
18898 const char *LibCallDoubleNameFinite,
18899 const char *LibCallFloatNameFinite,
18900 SDValue Op,
18901 SelectionDAG &DAG) const {
18902 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18903 return SDValue();
18904
18905 if (!isLowringToMASSFiniteSafe(Op))
18906 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18907 DAG);
18908
18909 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18910 LibCallDoubleNameFinite, Op, DAG);
18911}
18912
18913SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18914 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18915 "__xl_powf_finite", Op, DAG);
18916}
18917
18918SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18919 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18920 "__xl_sinf_finite", Op, DAG);
18921}
18922
18923SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18924 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18925 "__xl_cosf_finite", Op, DAG);
18926}
18927
18928SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18929 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18930 "__xl_logf_finite", Op, DAG);
18931}
18932
18933SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18934 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18935 "__xl_log10f_finite", Op, DAG);
18936}
18937
18938SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18939 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18940 "__xl_expf_finite", Op, DAG);
18941}
18942
18943// If we happen to match to an aligned D-Form, check if the Frame Index is
18944// adequately aligned. If it is not, reset the mode to match to X-Form.
18945static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18946 PPC::AddrMode &Mode) {
18947 if (!isa<FrameIndexSDNode>(N))
18948 return;
18949 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18950 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18951 Mode = PPC::AM_XForm;
18952}
18953
18954/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18955/// compute the address flags of the node, get the optimal address mode based
18956/// on the flags, and set the Base and Disp based on the address mode.
18958 SDValue N, SDValue &Disp,
18959 SDValue &Base,
18960 SelectionDAG &DAG,
18961 MaybeAlign Align) const {
18962 SDLoc DL(Parent);
18963
18964 // Compute the address flags.
18965 unsigned Flags = computeMOFlags(Parent, N, DAG);
18966
18967 // Get the optimal address mode based on the Flags.
18968 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18969
18970 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18971 // Select an X-Form load if it is not.
18972 setXFormForUnalignedFI(N, Flags, Mode);
18973
18974 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18975 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18976 assert(Subtarget.isUsingPCRelativeCalls() &&
18977 "Must be using PC-Relative calls when a valid PC-Relative node is "
18978 "present!");
18979 Mode = PPC::AM_PCRel;
18980 }
18981
18982 // Set Base and Disp accordingly depending on the address mode.
18983 switch (Mode) {
18984 case PPC::AM_DForm:
18985 case PPC::AM_DSForm:
18986 case PPC::AM_DQForm: {
18987 // This is a register plus a 16-bit immediate. The base will be the
18988 // register and the displacement will be the immediate unless it
18989 // isn't sufficiently aligned.
18990 if (Flags & PPC::MOF_RPlusSImm16) {
18991 SDValue Op0 = N.getOperand(0);
18992 SDValue Op1 = N.getOperand(1);
18993 int16_t Imm = Op1->getAsZExtVal();
18994 if (!Align || isAligned(*Align, Imm)) {
18995 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
18996 Base = Op0;
18997 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18998 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18999 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19000 }
19001 break;
19002 }
19003 }
19004 // This is a register plus the @lo relocation. The base is the register
19005 // and the displacement is the global address.
19006 else if (Flags & PPC::MOF_RPlusLo) {
19007 Disp = N.getOperand(1).getOperand(0); // The global address.
19012 Base = N.getOperand(0);
19013 break;
19014 }
19015 // This is a constant address at most 32 bits. The base will be
19016 // zero or load-immediate-shifted and the displacement will be
19017 // the low 16 bits of the address.
19018 else if (Flags & PPC::MOF_AddrIsSImm32) {
19019 auto *CN = cast<ConstantSDNode>(N);
19020 EVT CNType = CN->getValueType(0);
19021 uint64_t CNImm = CN->getZExtValue();
19022 // If this address fits entirely in a 16-bit sext immediate field, codegen
19023 // this as "d, 0".
19024 int16_t Imm;
19025 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19026 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19027 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19028 CNType);
19029 break;
19030 }
19031 // Handle 32-bit sext immediate with LIS + Addr mode.
19032 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19033 (!Align || isAligned(*Align, CNImm))) {
19034 int32_t Addr = (int32_t)CNImm;
19035 // Otherwise, break this down into LIS + Disp.
19036 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19037 Base =
19038 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
19039 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19040 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19041 break;
19042 }
19043 }
19044 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19045 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19046 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
19047 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19048 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19049 } else
19050 Base = N;
19051 break;
19052 }
19053 case PPC::AM_PrefixDForm: {
19054 int64_t Imm34 = 0;
19055 unsigned Opcode = N.getOpcode();
19056 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19057 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19058 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19059 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19060 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19061 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19062 else
19063 Base = N.getOperand(0);
19064 } else if (isIntS34Immediate(N, Imm34)) {
19065 // The address is a 34-bit signed immediate.
19066 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19067 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19068 }
19069 break;
19070 }
19071 case PPC::AM_PCRel: {
19072 // When selecting PC-Relative instructions, "Base" is not utilized as
19073 // we select the address as [PC+imm].
19074 Disp = N;
19075 break;
19076 }
19077 case PPC::AM_None:
19078 break;
19079 default: { // By default, X-Form is always available to be selected.
19080 // When a frame index is not aligned, we also match by XForm.
19081 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
19082 Base = FI ? N : N.getOperand(1);
19083 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19084 N.getValueType())
19085 : N.getOperand(0);
19086 break;
19087 }
19088 }
19089 return Mode;
19090}
19091
19093 bool Return,
19094 bool IsVarArg) const {
19095 switch (CC) {
19096 case CallingConv::Cold:
19097 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19098 default:
19099 return CC_PPC64_ELF;
19100 }
19101}
19102
19104 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19105}
19106
19109 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19110 if (shouldInlineQuadwordAtomics() && Size == 128)
19112
19113 switch (AI->getOperation()) {
19119 default:
19121 }
19122
19123 llvm_unreachable("unreachable atomicrmw operation");
19124}
19125
19128 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19129 if (shouldInlineQuadwordAtomics() && Size == 128)
19132}
19133
19134static Intrinsic::ID
19136 switch (BinOp) {
19137 default:
19138 llvm_unreachable("Unexpected AtomicRMW BinOp");
19140 return Intrinsic::ppc_atomicrmw_xchg_i128;
19141 case AtomicRMWInst::Add:
19142 return Intrinsic::ppc_atomicrmw_add_i128;
19143 case AtomicRMWInst::Sub:
19144 return Intrinsic::ppc_atomicrmw_sub_i128;
19145 case AtomicRMWInst::And:
19146 return Intrinsic::ppc_atomicrmw_and_i128;
19147 case AtomicRMWInst::Or:
19148 return Intrinsic::ppc_atomicrmw_or_i128;
19149 case AtomicRMWInst::Xor:
19150 return Intrinsic::ppc_atomicrmw_xor_i128;
19152 return Intrinsic::ppc_atomicrmw_nand_i128;
19153 }
19154}
19155
19157 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19158 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19159 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19160 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19161 Type *ValTy = Incr->getType();
19162 assert(ValTy->getPrimitiveSizeInBits() == 128);
19163 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19164 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19165 Value *IncrHi =
19166 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19167 Value *LoHi = Builder.CreateIntrinsic(
19169 {AlignedAddr, IncrLo, IncrHi});
19170 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19171 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19172 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19173 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19174 return Builder.CreateOr(
19175 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19176}
19177
19179 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19180 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19181 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19182 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19183 Type *ValTy = CmpVal->getType();
19184 assert(ValTy->getPrimitiveSizeInBits() == 128);
19185 Function *IntCmpXchg =
19186 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19187 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19188 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19189 Value *CmpHi =
19190 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19191 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19192 Value *NewHi =
19193 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19194 emitLeadingFence(Builder, CI, Ord);
19195 Value *LoHi =
19196 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19197 emitTrailingFence(Builder, CI, Ord);
19198 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19199 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19200 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19201 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19202 return Builder.CreateOr(
19203 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19204}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition: IVUsers.cpp:48
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5463
bool isDenormal() const
Definition: APFloat.h:1446
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1700
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
BinOp getOperation() const
Definition: Instructions.h:805
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:396
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:893
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1887
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1399
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1261
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1334
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1267
unsigned arg_size() const
Definition: InstrTypes.h:1284
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:843
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:719
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:373
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:716
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:365
arg_iterator arg_begin()
Definition: Function.h:880
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
size_t arg_size() const
Definition: Function.h:913
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:264
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:268
bool hasHiddenVisibility() const
Definition: GlobalValue.h:251
StringRef getSection() const
Definition: Globals.cpp:189
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:632
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:130
bool hasComdat() const
Definition: GlobalValue.h:242
Type * getValueType() const
Definition: GlobalValue.h:297
bool hasProtectedVisibility() const
Definition: GlobalValue.h:252
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:176
bool isUnordered() const
Definition: Instructions.h:249
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1073
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:71
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:304
bool is32BitELFABI() const
Definition: PPCSubtarget.h:224
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:266
MVT getScalarIntVT() const
Definition: PPCSubtarget.h:253
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool useSoftFloat() const
Definition: PPCSubtarget.h:179
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:207
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:260
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:278
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:211
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:284
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:296
bool is64BitELFABI() const
Definition: PPCSubtarget.h:223
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:302
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:157
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:272
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:499
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:802
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:761
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:857
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:828
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:756
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:713
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:797
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:874
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:255
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:64
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ TargetConstantPool
Definition: ISDOpcodes.h:174
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ TargetJumpTable
Definition: ISDOpcodes.h:173
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1168
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1165
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1211
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1398
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1498
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1643
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1649
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:732
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:144
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:192
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:195
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:170
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:201
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:152
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:119
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:148
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:198
@ MO_TPREL_HA
Definition: PPC.h:177
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:111
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:186
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:138
@ MO_TPREL_LO
Definition: PPC.h:176
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:173
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:164
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:189
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:133
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:158
@ MO_HA
Definition: PPC.h:174
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:115
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:155
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:160
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:564
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:582
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:257
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:260
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:53
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:59
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)