LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/CallingConv.h"
58#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsPowerPC.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
83#include "llvm/Support/Debug.h"
85#include "llvm/Support/Format.h"
91#include <algorithm>
92#include <cassert>
93#include <cstdint>
94#include <iterator>
95#include <list>
96#include <optional>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "ppc-lowering"
103
105 "disable-p10-store-forward",
106 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
107 cl::init(false));
108
109static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
110cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
113cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
116cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
117
118static cl::opt<bool> DisableSCO("disable-ppc-sco",
119cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
120
121static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
122cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
123
124static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
125cl::desc("use absolute jump tables on ppc"), cl::Hidden);
126
127static cl::opt<bool>
128 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
129 cl::desc("disable vector permute decomposition"),
130 cl::init(true), cl::Hidden);
131
133 "disable-auto-paired-vec-st",
134 cl::desc("disable automatically generated 32byte paired vector stores"),
135 cl::init(true), cl::Hidden);
136
138 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
139 cl::desc("Set minimum number of entries to use a jump table on PPC"));
140
142 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
143 cl::desc("max depth when checking alias info in GatherAllAliases()"));
144
146 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
147 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
148 "function to use initial-exec"));
149
150STATISTIC(NumTailCalls, "Number of tail calls");
151STATISTIC(NumSiblingCalls, "Number of sibling calls");
152STATISTIC(ShufflesHandledWithVPERM,
153 "Number of shuffles lowered to a VPERM or XXPERM");
154STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
155
156static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
157
158static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
159
160// A faster local-[exec|dynamic] TLS access sequence (enabled with the
161// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
162// variables; consistent with the IBM XL compiler, we apply a max size of
163// slightly under 32KB.
165
166// FIXME: Remove this once the bug has been fixed!
168
170 const PPCSubtarget &STI)
171 : TargetLowering(TM), Subtarget(STI) {
172 // Initialize map that relates the PPC addressing modes to the computed flags
173 // of a load/store instruction. The map is used to determine the optimal
174 // addressing mode when selecting load and stores.
175 initializeAddrModeMap();
176 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
177 // arguments are at least 4/8 bytes aligned.
178 bool isPPC64 = Subtarget.isPPC64();
179 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
180 const MVT RegVT = Subtarget.getScalarIntVT();
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
198
199 // PowerPC uses addo_carry,subo_carry to propagate carry.
202
203 // On P10, the default lowering generates better code using the
204 // setbc instruction.
205 if (!Subtarget.hasP10Vector()) {
207 if (isPPC64)
209 }
210
211 // Match BITREVERSE to customized fast code sequence in the td file.
214
215 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
217
218 // Custom lower inline assembly to check for special registers.
221
222 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
223 for (MVT VT : MVT::integer_valuetypes()) {
226 }
227
228 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
230
231 if (Subtarget.isISA3_0()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
235 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
236 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
237 } else {
238 // No extending loads from f16 or HW conversions back and forth.
239 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
241 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
244 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
247 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
248 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
249 }
250
251 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
252
253 // PowerPC has pre-inc load and store's.
264 if (!Subtarget.hasSPE()) {
269 }
270
271 if (Subtarget.useCRBits()) {
273
274 if (isPPC64 || Subtarget.hasFPCVT()) {
279
281 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
283 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
284
289
291 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
293 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
294 } else {
299 }
300
301 // PowerPC does not support direct load/store of condition registers.
304
305 // FIXME: Remove this once the ANDI glue bug is fixed:
306 if (ANDIGlueBug)
308
309 for (MVT VT : MVT::integer_valuetypes()) {
312 setTruncStoreAction(VT, MVT::i1, Expand);
313 }
314
315 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
316 }
317
318 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
319 // PPC (the libcall is not available).
324
325 // We do not currently implement these libm ops for PowerPC.
326 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
331 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
332
333 // PowerPC has no SREM/UREM instructions unless we are on P9
334 // On P9 we may use a hardware instruction to compute the remainder.
335 // When the result of both the remainder and the division is required it is
336 // more efficient to compute the remainder from the result of the division
337 // rather than use the remainder instruction. The instructions are legalized
338 // directly because the DivRemPairsPass performs the transformation at the IR
339 // level.
340 if (Subtarget.isISA3_0()) {
345 } else {
350 }
351
352 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
361
362 // Handle constrained floating-point operations of scalar.
363 // TODO: Handle SPE specific operation.
369
374
375 if (!Subtarget.hasSPE()) {
378 }
379
380 if (Subtarget.hasVSX()) {
383 }
384
385 if (Subtarget.hasFSQRT()) {
388 }
389
390 if (Subtarget.hasFPRND()) {
395
400 }
401
402 // We don't support sin/cos/sqrt/fmod/pow
413
414 // MASS transformation for LLVM intrinsics with replicating fast-math flag
415 // to be consistent to PPCGenScalarMASSEntries pass
416 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
429 }
430
431 if (Subtarget.hasSPE()) {
434 } else {
435 setOperationAction(ISD::FMA , MVT::f64, Legal);
436 setOperationAction(ISD::FMA , MVT::f32, Legal);
439 }
440
441 if (Subtarget.hasSPE())
442 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
443
444 // If we're enabling GP optimizations, use hardware square root
445 if (!Subtarget.hasFSQRT() &&
446 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
447 Subtarget.hasFRE()))
449
450 if (!Subtarget.hasFSQRT() &&
451 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
452 Subtarget.hasFRES()))
454
455 if (Subtarget.hasFCPSGN()) {
458 } else {
461 }
462
463 if (Subtarget.hasFPRND()) {
468
473 }
474
475 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
476 // instruction xxbrd to speed up scalar BSWAP64.
477 if (Subtarget.isISA3_1()) {
480 } else {
483 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
484 }
485
486 // CTPOP or CTTZ were introduced in P8/P9 respectively
487 if (Subtarget.isISA3_0()) {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
490 } else {
491 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
492 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
493 }
494
495 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
498 } else {
501 }
502
503 // PowerPC does not have ROTR
506
507 if (!Subtarget.useCRBits()) {
508 // PowerPC does not have Select
513 }
514
515 // PowerPC wants to turn select_cc of FP into fsel when possible.
518
519 // PowerPC wants to optimize integer setcc a bit
520 if (!Subtarget.useCRBits())
522
523 if (Subtarget.hasFPU()) {
527
531 }
532
533 // PowerPC does not have BRCOND which requires SetCC
534 if (!Subtarget.useCRBits())
536
538
539 if (Subtarget.hasSPE()) {
540 // SPE has built-in conversions
547
548 // SPE supports signaling compare of f32/f64.
551 } else {
552 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
555
556 // PowerPC does not have [U|S]INT_TO_FP
561 }
562
563 if (Subtarget.hasDirectMove() && isPPC64) {
568 if (TM.Options.UnsafeFPMath) {
577 }
578 } else {
583 }
584
585 // We cannot sextinreg(i1). Expand to shifts.
587
588 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
589 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
590 // support continuation, user-level threading, and etc.. As a result, no
591 // other SjLj exception interfaces are implemented and please don't build
592 // your own exception handling based on them.
593 // LLVM/Clang supports zero-cost DWARF exception handling.
596
597 // We want to legalize GlobalAddress and ConstantPool nodes into the
598 // appropriate instructions to materialize the address.
609
610 // TRAP is legal.
611 setOperationAction(ISD::TRAP, MVT::Other, Legal);
612
613 // TRAMPOLINE is custom lowered.
616
617 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
619
620 if (Subtarget.is64BitELFABI()) {
621 // VAARG always uses double-word chunks, so promote anything smaller.
623 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
625 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
627 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
629 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
631 } else if (Subtarget.is32BitELFABI()) {
632 // VAARG is custom lowered with the 32-bit SVR4 ABI.
635 } else
637
638 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
639 if (Subtarget.is32BitELFABI())
641 else
643
644 // Use the default implementation.
645 setOperationAction(ISD::VAEND , MVT::Other, Expand);
654
655 // We want to custom lower some of our intrinsics.
661
662 // To handle counter-based loop conditions.
664
669
670 // Comparisons that require checking two conditions.
671 if (Subtarget.hasSPE()) {
676 }
689
692
693 if (Subtarget.has64BitSupport()) {
694 // They also have instructions for converting between i64 and fp.
703 // This is just the low 32 bits of a (signed) fp->i64 conversion.
704 // We cannot do this with Promote because i64 is not a legal type.
707
708 if (Subtarget.hasLFIWAX() || isPPC64) {
711 }
712 } else {
713 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
714 if (Subtarget.hasSPE()) {
717 } else {
720 }
721 }
722
723 // With the instructions enabled under FPCVT, we can do everything.
724 if (Subtarget.hasFPCVT()) {
725 if (Subtarget.has64BitSupport()) {
734 }
735
744 }
745
746 if (Subtarget.use64BitRegs()) {
747 // 64-bit PowerPC implementations can support i64 types directly
748 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
749 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
751 // 64-bit PowerPC wants to expand i128 shifts itself.
755 } else {
756 // 32-bit PowerPC wants to expand i64 shifts itself.
760 }
761
762 // PowerPC has better expansions for funnel shifts than the generic
763 // TargetLowering::expandFunnelShift.
764 if (Subtarget.has64BitSupport()) {
767 }
770
771 if (Subtarget.hasVSX()) {
778 }
779
780 if (Subtarget.hasAltivec()) {
781 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
786 }
787 // First set operation action for all vector types to expand. Then we
788 // will selectively turn on ones that can be effectively codegen'd.
790 // add/sub are legal for all supported vector VT's.
793
794 // For v2i64, these are only valid with P8Vector. This is corrected after
795 // the loop.
796 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
801 }
802 else {
807 }
808
809 if (Subtarget.hasVSX()) {
812 }
813
814 // Vector instructions introduced in P8
815 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 }
819 else {
822 }
823
824 // Vector instructions introduced in P9
825 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
827 else
829
830 // We promote all shuffles to v16i8.
832 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
833
834 // We promote all non-typed operations to v4i32.
836 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
838 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
840 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
842 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
844 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
847 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
849 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
850
851 // No other operations are legal.
890
891 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
892 setTruncStoreAction(VT, InnerVT, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
896 }
897 }
899 if (!Subtarget.hasP8Vector()) {
900 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
901 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
903 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
904 }
905
906 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
907 // with merges, splats, etc.
909
910 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
911 // are cheap, so handle them before they get expanded to scalar.
917
918 setOperationAction(ISD::AND , MVT::v4i32, Legal);
919 setOperationAction(ISD::OR , MVT::v4i32, Legal);
920 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
921 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
923 Subtarget.useCRBits() ? Legal : Expand);
924 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
934 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
937
938 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
939 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
940 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
941 if (Subtarget.hasAltivec())
942 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
944 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
945 if (Subtarget.hasP8Altivec())
946 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
947
948 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
951 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
952
953 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
954 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
955
956 if (Subtarget.hasVSX()) {
957 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
958 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
960 }
961
962 if (Subtarget.hasP8Altivec())
963 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
964 else
965 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
966
967 if (Subtarget.isISA3_1()) {
968 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
971 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
972 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
973 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
975 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
977 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
979 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
981 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
983 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
985 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
986 }
987
988 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
989 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
990
993 // LE is P8+/64-bit so direct moves are supported and these operations
994 // are legal. The custom transformation requires 64-bit since we need a
995 // pair of stores that will cover a 128-bit load for P10.
996 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1000 }
1001
1006
1007 // Altivec does not contain unordered floating-point compare instructions
1008 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1011 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1012
1013 if (Subtarget.hasVSX()) {
1016 if (Subtarget.hasP8Vector()) {
1019 }
1020 if (Subtarget.hasDirectMove() && isPPC64) {
1029 }
1031
1032 // The nearbyint variants are not allowed to raise the inexact exception
1033 // so we can only code-gen them with unsafe math.
1034 if (TM.Options.UnsafeFPMath) {
1037 }
1038
1039 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1041 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1047
1049 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1053
1054 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1056
1057 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1059
1060 // Share the Altivec comparison restrictions.
1061 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1064 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1065
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1068
1070
1071 if (Subtarget.hasP8Vector())
1072 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1073
1074 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1075
1076 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1078 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1079
1080 if (Subtarget.hasP8Altivec()) {
1081 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1084
1085 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1086 // SRL, but not for SRA because of the instructions available:
1087 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1088 // doing
1089 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1091 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1092
1093 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1094 }
1095 else {
1096 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1098 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1099
1100 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1101
1102 // VSX v2i64 only supports non-arithmetic operations.
1103 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1104 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1105 }
1106
1107 if (Subtarget.isISA3_1())
1108 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1109 else
1110 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1113 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1115 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1116
1118
1127
1128 // Custom handling for partial vectors of integers converted to
1129 // floating point. We already have optimal handling for v2i32 through
1130 // the DAG combine, so those aren't necessary.
1147
1148 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1149 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1150 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1151 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1154
1157
1158 // Handle constrained floating-point operations of vector.
1159 // The predictor is `hasVSX` because altivec instruction has
1160 // no exception but VSX vector instruction has.
1174
1188
1189 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1190 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1191
1192 for (MVT FPT : MVT::fp_valuetypes())
1193 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1194
1195 // Expand the SELECT to SELECT_CC
1197
1198 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1199 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1200
1201 // No implementation for these ops for PowerPC.
1203 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1204 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1205 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1207 setOperationAction(ISD::FREM, MVT::f128, Expand);
1208 }
1209
1210 if (Subtarget.hasP8Altivec()) {
1211 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1212 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1213 }
1214
1215 if (Subtarget.hasP9Vector()) {
1218
1219 // Test data class instructions store results in CR bits.
1220 if (Subtarget.useCRBits()) {
1225 }
1226
1227 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1228 // SRL, but not for SRA because of the instructions available:
1229 // VS{RL} and VS{RL}O.
1230 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1232 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1233
1234 setOperationAction(ISD::FADD, MVT::f128, Legal);
1235 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1236 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1237 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1239
1240 setOperationAction(ISD::FMA, MVT::f128, Legal);
1247
1249 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1251 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1254
1258
1259 // Handle constrained floating-point operations of fp128
1276 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1279 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1280 } else if (Subtarget.hasVSX()) {
1283
1284 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1285 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1286
1287 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1288 // fp_to_uint and int_to_fp.
1291
1292 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1293 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1294 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1295 setOperationAction(ISD::FABS, MVT::f128, Expand);
1297 setOperationAction(ISD::FMA, MVT::f128, Expand);
1299
1300 // Expand the fp_extend if the target type is fp128.
1303
1304 // Expand the fp_round if the source type is fp128.
1305 for (MVT VT : {MVT::f32, MVT::f64}) {
1308 }
1309
1314
1315 // Lower following f128 select_cc pattern:
1316 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1318
1319 // We need to handle f128 SELECT_CC with integer result type.
1321 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1322 }
1323
1324 if (Subtarget.hasP9Altivec()) {
1325 if (Subtarget.isISA3_1()) {
1330 } else {
1333 }
1341
1342 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1344 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1345 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1346 }
1347
1348 if (Subtarget.hasP10Vector()) {
1350 }
1351 }
1352
1353 if (Subtarget.pairedVectorMemops()) {
1354 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1355 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1356 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1357 }
1358 if (Subtarget.hasMMA()) {
1359 if (Subtarget.isISAFuture()) {
1360 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1361 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1362 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1363 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1364 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1365 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1366 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1367 } else {
1368 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1369 }
1370 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1371 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1373 }
1374
1375 if (Subtarget.has64BitSupport())
1377
1378 if (Subtarget.isISA3_1())
1379 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1380
1381 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1382
1383 if (!isPPC64) {
1386 }
1387
1392 }
1393
1395
1396 if (Subtarget.hasAltivec()) {
1397 // Altivec instructions set fields to all zeros or all ones.
1399 }
1400
1403 else if (isPPC64)
1405 else
1407
1408 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1409
1410 // We have target-specific dag combine patterns for the following nodes:
1413 if (Subtarget.hasFPCVT())
1416 if (Subtarget.useCRBits())
1420
1422
1424
1425 if (Subtarget.useCRBits()) {
1427 }
1428
1429 // With 32 condition bits, we don't need to sink (and duplicate) compares
1430 // aggressively in CodeGenPrep.
1431 if (Subtarget.useCRBits()) {
1433 }
1434
1435 // TODO: The default entry number is set to 64. This stops most jump table
1436 // generation on PPC. But it is good for current PPC HWs because the indirect
1437 // branch instruction mtctr to the jump table may lead to bad branch predict.
1438 // Re-evaluate this value on future HWs that can do better with mtctr.
1440
1442 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1443
1444 auto CPUDirective = Subtarget.getCPUDirective();
1445 switch (CPUDirective) {
1446 default: break;
1447 case PPC::DIR_970:
1448 case PPC::DIR_A2:
1449 case PPC::DIR_E500:
1450 case PPC::DIR_E500mc:
1451 case PPC::DIR_E5500:
1452 case PPC::DIR_PWR4:
1453 case PPC::DIR_PWR5:
1454 case PPC::DIR_PWR5X:
1455 case PPC::DIR_PWR6:
1456 case PPC::DIR_PWR6X:
1457 case PPC::DIR_PWR7:
1458 case PPC::DIR_PWR8:
1459 case PPC::DIR_PWR9:
1460 case PPC::DIR_PWR10:
1461 case PPC::DIR_PWR11:
1465 break;
1466 }
1467
1468 if (Subtarget.enableMachineScheduler())
1470 else
1472
1474
1475 // The Freescale cores do better with aggressive inlining of memcpy and
1476 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1477 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1478 MaxStoresPerMemset = 32;
1480 MaxStoresPerMemcpy = 32;
1484 } else if (CPUDirective == PPC::DIR_A2) {
1485 // The A2 also benefits from (very) aggressive inlining of memcpy and
1486 // friends. The overhead of a the function call, even when warm, can be
1487 // over one hundred cycles.
1488 MaxStoresPerMemset = 128;
1489 MaxStoresPerMemcpy = 128;
1490 MaxStoresPerMemmove = 128;
1491 MaxLoadsPerMemcmp = 128;
1492 } else {
1495 }
1496
1497 // Enable generation of STXVP instructions by default for mcpu=future.
1498 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1500 DisableAutoPairedVecSt = false;
1501
1502 IsStrictFPEnabled = true;
1503
1504 // Let the subtarget (CPU) decide if a predictable select is more expensive
1505 // than the corresponding branch. This information is used in CGP to decide
1506 // when to convert selects into branches.
1508
1510}
1511
1512// *********************************** NOTE ************************************
1513// For selecting load and store instructions, the addressing modes are defined
1514// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1515// patterns to match the load the store instructions.
1516//
1517// The TD definitions for the addressing modes correspond to their respective
1518// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1519// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1520// address mode flags of a particular node. Afterwards, the computed address
1521// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1522// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1523// accordingly, based on the preferred addressing mode.
1524//
1525// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1526// MemOpFlags contains all the possible flags that can be used to compute the
1527// optimal addressing mode for load and store instructions.
1528// AddrMode contains all the possible load and store addressing modes available
1529// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1530//
1531// When adding new load and store instructions, it is possible that new address
1532// flags may need to be added into MemOpFlags, and a new addressing mode will
1533// need to be added to AddrMode. An entry of the new addressing mode (consisting
1534// of the minimal and main distinguishing address flags for the new load/store
1535// instructions) will need to be added into initializeAddrModeMap() below.
1536// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1537// need to be updated to account for selecting the optimal addressing mode.
1538// *****************************************************************************
1539/// Initialize the map that relates the different addressing modes of the load
1540/// and store instructions to a set of flags. This ensures the load/store
1541/// instruction is correctly matched during instruction selection.
1542void PPCTargetLowering::initializeAddrModeMap() {
1543 AddrModesMap[PPC::AM_DForm] = {
1544 // LWZ, STW
1549 // LBZ, LHZ, STB, STH
1554 // LHA
1559 // LFS, LFD, STFS, STFD
1564 };
1565 AddrModesMap[PPC::AM_DSForm] = {
1566 // LWA
1570 // LD, STD
1574 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1578 };
1579 AddrModesMap[PPC::AM_DQForm] = {
1580 // LXV, STXV
1584 };
1585 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1587 // TODO: Add mapping for quadword load/store.
1588}
1589
1590/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1591/// the desired ByVal argument alignment.
1592static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1593 if (MaxAlign == MaxMaxAlign)
1594 return;
1595 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1596 if (MaxMaxAlign >= 32 &&
1597 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1598 MaxAlign = Align(32);
1599 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1600 MaxAlign < 16)
1601 MaxAlign = Align(16);
1602 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1603 Align EltAlign;
1604 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1605 if (EltAlign > MaxAlign)
1606 MaxAlign = EltAlign;
1607 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1608 for (auto *EltTy : STy->elements()) {
1609 Align EltAlign;
1610 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1611 if (EltAlign > MaxAlign)
1612 MaxAlign = EltAlign;
1613 if (MaxAlign == MaxMaxAlign)
1614 break;
1615 }
1616 }
1617}
1618
1619/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1620/// function arguments in the caller parameter area.
1622 const DataLayout &DL) const {
1623 // 16byte and wider vectors are passed on 16byte boundary.
1624 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1625 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1626 if (Subtarget.hasAltivec())
1627 getMaxByValAlign(Ty, Alignment, Align(16));
1628 return Alignment;
1629}
1630
1632 return Subtarget.useSoftFloat();
1633}
1634
1636 return Subtarget.hasSPE();
1637}
1638
1640 return VT.isScalarInteger();
1641}
1642
1644 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1645 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1646 return false;
1647
1648 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1649 if (VTy->getScalarType()->isIntegerTy()) {
1650 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1651 if (ElemSizeInBits == 32) {
1652 Index = Subtarget.isLittleEndian() ? 2 : 1;
1653 return true;
1654 }
1655 if (ElemSizeInBits == 64) {
1656 Index = Subtarget.isLittleEndian() ? 1 : 0;
1657 return true;
1658 }
1659 }
1660 }
1661 return false;
1662}
1663
1664const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1665 switch ((PPCISD::NodeType)Opcode) {
1666 case PPCISD::FIRST_NUMBER: break;
1667 case PPCISD::FSEL: return "PPCISD::FSEL";
1668 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1669 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1670 case PPCISD::FCFID: return "PPCISD::FCFID";
1671 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1672 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1673 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1674 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1675 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1676 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1677 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1678 case PPCISD::FRE: return "PPCISD::FRE";
1679 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1680 case PPCISD::FTSQRT:
1681 return "PPCISD::FTSQRT";
1682 case PPCISD::FSQRT:
1683 return "PPCISD::FSQRT";
1684 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1685 case PPCISD::VPERM: return "PPCISD::VPERM";
1686 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1688 return "PPCISD::XXSPLTI_SP_TO_DP";
1690 return "PPCISD::XXSPLTI32DX";
1691 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1692 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1693 case PPCISD::XXPERM:
1694 return "PPCISD::XXPERM";
1695 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1696 case PPCISD::VSRQ:
1697 return "PPCISD::VSRQ";
1698 case PPCISD::CMPB: return "PPCISD::CMPB";
1699 case PPCISD::Hi: return "PPCISD::Hi";
1700 case PPCISD::Lo: return "PPCISD::Lo";
1701 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1702 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1703 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1704 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1705 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1706 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1707 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1708 case PPCISD::SRL: return "PPCISD::SRL";
1709 case PPCISD::SRA: return "PPCISD::SRA";
1710 case PPCISD::SHL: return "PPCISD::SHL";
1711 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1712 case PPCISD::CALL: return "PPCISD::CALL";
1713 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1714 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1715 case PPCISD::CALL_RM:
1716 return "PPCISD::CALL_RM";
1718 return "PPCISD::CALL_NOP_RM";
1720 return "PPCISD::CALL_NOTOC_RM";
1721 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1722 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1723 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1724 case PPCISD::BCTRL_RM:
1725 return "PPCISD::BCTRL_RM";
1727 return "PPCISD::BCTRL_LOAD_TOC_RM";
1728 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1729 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1730 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1731 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1732 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1733 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1734 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1735 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1736 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1737 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1739 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1741 return "PPCISD::ANDI_rec_1_EQ_BIT";
1743 return "PPCISD::ANDI_rec_1_GT_BIT";
1744 case PPCISD::VCMP: return "PPCISD::VCMP";
1745 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1746 case PPCISD::LBRX: return "PPCISD::LBRX";
1747 case PPCISD::STBRX: return "PPCISD::STBRX";
1748 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1749 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1750 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1751 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1752 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1753 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1754 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1755 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1756 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1758 return "PPCISD::ST_VSR_SCAL_INT";
1759 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1760 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1761 case PPCISD::BDZ: return "PPCISD::BDZ";
1762 case PPCISD::MFFS: return "PPCISD::MFFS";
1763 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1764 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1765 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1766 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1767 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1768 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1769 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1770 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1771 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1772 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1773 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1774 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1775 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1776 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1777 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1778 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1779 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1780 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1781 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1782 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1783 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1784 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1785 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1787 return "PPCISD::PADDI_DTPREL";
1788 case PPCISD::VADD_SPLAT:
1789 return "PPCISD::VADD_SPLAT";
1790 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1791 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1792 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1793 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1794 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1795 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1796 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1797 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1798 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1800 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1802 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1803 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1804 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1805 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1806 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1807 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1808 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1809 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1810 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1812 return "PPCISD::STRICT_FADDRTZ";
1814 return "PPCISD::STRICT_FCTIDZ";
1816 return "PPCISD::STRICT_FCTIWZ";
1818 return "PPCISD::STRICT_FCTIDUZ";
1820 return "PPCISD::STRICT_FCTIWUZ";
1822 return "PPCISD::STRICT_FCFID";
1824 return "PPCISD::STRICT_FCFIDU";
1826 return "PPCISD::STRICT_FCFIDS";
1828 return "PPCISD::STRICT_FCFIDUS";
1829 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1830 case PPCISD::STORE_COND:
1831 return "PPCISD::STORE_COND";
1832 case PPCISD::SETBC:
1833 return "PPCISD::SETBC";
1834 case PPCISD::SETBCR:
1835 return "PPCISD::SETBCR";
1836 case PPCISD::ADDC:
1837 return "PPCISD::ADDC";
1838 case PPCISD::ADDE:
1839 return "PPCISD::ADDE";
1840 case PPCISD::SUBC:
1841 return "PPCISD::SUBC";
1842 case PPCISD::SUBE:
1843 return "PPCISD::SUBE";
1844 }
1845 return nullptr;
1846}
1847
1849 EVT VT) const {
1850 if (!VT.isVector())
1851 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1852
1854}
1855
1857 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1858 return true;
1859}
1860
1861//===----------------------------------------------------------------------===//
1862// Node matching predicates, for use by the tblgen matching code.
1863//===----------------------------------------------------------------------===//
1864
1865/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1867 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1868 return CFP->getValueAPF().isZero();
1869 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1870 // Maybe this has already been legalized into the constant pool?
1871 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1872 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1873 return CFP->getValueAPF().isZero();
1874 }
1875 return false;
1876}
1877
1878/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1879/// true if Op is undef or if it matches the specified value.
1880static bool isConstantOrUndef(int Op, int Val) {
1881 return Op < 0 || Op == Val;
1882}
1883
1884/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1885/// VPKUHUM instruction.
1886/// The ShuffleKind distinguishes between big-endian operations with
1887/// two different inputs (0), either-endian operations with two identical
1888/// inputs (1), and little-endian operations with two different inputs (2).
1889/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1891 SelectionDAG &DAG) {
1892 bool IsLE = DAG.getDataLayout().isLittleEndian();
1893 if (ShuffleKind == 0) {
1894 if (IsLE)
1895 return false;
1896 for (unsigned i = 0; i != 16; ++i)
1897 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1898 return false;
1899 } else if (ShuffleKind == 2) {
1900 if (!IsLE)
1901 return false;
1902 for (unsigned i = 0; i != 16; ++i)
1903 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1904 return false;
1905 } else if (ShuffleKind == 1) {
1906 unsigned j = IsLE ? 0 : 1;
1907 for (unsigned i = 0; i != 8; ++i)
1908 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1909 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1910 return false;
1911 }
1912 return true;
1913}
1914
1915/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1916/// VPKUWUM instruction.
1917/// The ShuffleKind distinguishes between big-endian operations with
1918/// two different inputs (0), either-endian operations with two identical
1919/// inputs (1), and little-endian operations with two different inputs (2).
1920/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1922 SelectionDAG &DAG) {
1923 bool IsLE = DAG.getDataLayout().isLittleEndian();
1924 if (ShuffleKind == 0) {
1925 if (IsLE)
1926 return false;
1927 for (unsigned i = 0; i != 16; i += 2)
1928 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1929 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1930 return false;
1931 } else if (ShuffleKind == 2) {
1932 if (!IsLE)
1933 return false;
1934 for (unsigned i = 0; i != 16; i += 2)
1935 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1936 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1937 return false;
1938 } else if (ShuffleKind == 1) {
1939 unsigned j = IsLE ? 0 : 2;
1940 for (unsigned i = 0; i != 8; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1943 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1944 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1945 return false;
1946 }
1947 return true;
1948}
1949
1950/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1951/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1952/// current subtarget.
1953///
1954/// The ShuffleKind distinguishes between big-endian operations with
1955/// two different inputs (0), either-endian operations with two identical
1956/// inputs (1), and little-endian operations with two different inputs (2).
1957/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1959 SelectionDAG &DAG) {
1960 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1961 if (!Subtarget.hasP8Vector())
1962 return false;
1963
1964 bool IsLE = DAG.getDataLayout().isLittleEndian();
1965 if (ShuffleKind == 0) {
1966 if (IsLE)
1967 return false;
1968 for (unsigned i = 0; i != 16; i += 4)
1969 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1970 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1971 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1972 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1973 return false;
1974 } else if (ShuffleKind == 2) {
1975 if (!IsLE)
1976 return false;
1977 for (unsigned i = 0; i != 16; i += 4)
1978 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1979 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1980 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1981 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1982 return false;
1983 } else if (ShuffleKind == 1) {
1984 unsigned j = IsLE ? 0 : 4;
1985 for (unsigned i = 0; i != 8; i += 4)
1986 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1987 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1988 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1989 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1990 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1991 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1992 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1993 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1994 return false;
1995 }
1996 return true;
1997}
1998
1999/// isVMerge - Common function, used to match vmrg* shuffles.
2000///
2001static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2002 unsigned LHSStart, unsigned RHSStart) {
2003 if (N->getValueType(0) != MVT::v16i8)
2004 return false;
2005 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2006 "Unsupported merge size!");
2007
2008 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2009 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2010 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2011 LHSStart+j+i*UnitSize) ||
2012 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2013 RHSStart+j+i*UnitSize))
2014 return false;
2015 }
2016 return true;
2017}
2018
2019/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2020/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2021/// The ShuffleKind distinguishes between big-endian merges with two
2022/// different inputs (0), either-endian merges with two identical inputs (1),
2023/// and little-endian merges with two different inputs (2). For the latter,
2024/// the input operands are swapped (see PPCInstrAltivec.td).
2026 unsigned ShuffleKind, SelectionDAG &DAG) {
2027 if (DAG.getDataLayout().isLittleEndian()) {
2028 if (ShuffleKind == 1) // unary
2029 return isVMerge(N, UnitSize, 0, 0);
2030 else if (ShuffleKind == 2) // swapped
2031 return isVMerge(N, UnitSize, 0, 16);
2032 else
2033 return false;
2034 } else {
2035 if (ShuffleKind == 1) // unary
2036 return isVMerge(N, UnitSize, 8, 8);
2037 else if (ShuffleKind == 0) // normal
2038 return isVMerge(N, UnitSize, 8, 24);
2039 else
2040 return false;
2041 }
2042}
2043
2044/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2045/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2046/// The ShuffleKind distinguishes between big-endian merges with two
2047/// different inputs (0), either-endian merges with two identical inputs (1),
2048/// and little-endian merges with two different inputs (2). For the latter,
2049/// the input operands are swapped (see PPCInstrAltivec.td).
2051 unsigned ShuffleKind, SelectionDAG &DAG) {
2052 if (DAG.getDataLayout().isLittleEndian()) {
2053 if (ShuffleKind == 1) // unary
2054 return isVMerge(N, UnitSize, 8, 8);
2055 else if (ShuffleKind == 2) // swapped
2056 return isVMerge(N, UnitSize, 8, 24);
2057 else
2058 return false;
2059 } else {
2060 if (ShuffleKind == 1) // unary
2061 return isVMerge(N, UnitSize, 0, 0);
2062 else if (ShuffleKind == 0) // normal
2063 return isVMerge(N, UnitSize, 0, 16);
2064 else
2065 return false;
2066 }
2067}
2068
2069/**
2070 * Common function used to match vmrgew and vmrgow shuffles
2071 *
2072 * The indexOffset determines whether to look for even or odd words in
2073 * the shuffle mask. This is based on the of the endianness of the target
2074 * machine.
2075 * - Little Endian:
2076 * - Use offset of 0 to check for odd elements
2077 * - Use offset of 4 to check for even elements
2078 * - Big Endian:
2079 * - Use offset of 0 to check for even elements
2080 * - Use offset of 4 to check for odd elements
2081 * A detailed description of the vector element ordering for little endian and
2082 * big endian can be found at
2083 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2084 * Targeting your applications - what little endian and big endian IBM XL C/C++
2085 * compiler differences mean to you
2086 *
2087 * The mask to the shuffle vector instruction specifies the indices of the
2088 * elements from the two input vectors to place in the result. The elements are
2089 * numbered in array-access order, starting with the first vector. These vectors
2090 * are always of type v16i8, thus each vector will contain 16 elements of size
2091 * 8. More info on the shuffle vector can be found in the
2092 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2093 * Language Reference.
2094 *
2095 * The RHSStartValue indicates whether the same input vectors are used (unary)
2096 * or two different input vectors are used, based on the following:
2097 * - If the instruction uses the same vector for both inputs, the range of the
2098 * indices will be 0 to 15. In this case, the RHSStart value passed should
2099 * be 0.
2100 * - If the instruction has two different vectors then the range of the
2101 * indices will be 0 to 31. In this case, the RHSStart value passed should
2102 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2103 * to 31 specify elements in the second vector).
2104 *
2105 * \param[in] N The shuffle vector SD Node to analyze
2106 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2107 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2108 * vector to the shuffle_vector instruction
2109 * \return true iff this shuffle vector represents an even or odd word merge
2110 */
2111static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2112 unsigned RHSStartValue) {
2113 if (N->getValueType(0) != MVT::v16i8)
2114 return false;
2115
2116 for (unsigned i = 0; i < 2; ++i)
2117 for (unsigned j = 0; j < 4; ++j)
2118 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2119 i*RHSStartValue+j+IndexOffset) ||
2120 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2121 i*RHSStartValue+j+IndexOffset+8))
2122 return false;
2123 return true;
2124}
2125
2126/**
2127 * Determine if the specified shuffle mask is suitable for the vmrgew or
2128 * vmrgow instructions.
2129 *
2130 * \param[in] N The shuffle vector SD Node to analyze
2131 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2132 * \param[in] ShuffleKind Identify the type of merge:
2133 * - 0 = big-endian merge with two different inputs;
2134 * - 1 = either-endian merge with two identical inputs;
2135 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2136 * little-endian merges).
2137 * \param[in] DAG The current SelectionDAG
2138 * \return true iff this shuffle mask
2139 */
2141 unsigned ShuffleKind, SelectionDAG &DAG) {
2142 if (DAG.getDataLayout().isLittleEndian()) {
2143 unsigned indexOffset = CheckEven ? 4 : 0;
2144 if (ShuffleKind == 1) // Unary
2145 return isVMerge(N, indexOffset, 0);
2146 else if (ShuffleKind == 2) // swapped
2147 return isVMerge(N, indexOffset, 16);
2148 else
2149 return false;
2150 }
2151 else {
2152 unsigned indexOffset = CheckEven ? 0 : 4;
2153 if (ShuffleKind == 1) // Unary
2154 return isVMerge(N, indexOffset, 0);
2155 else if (ShuffleKind == 0) // Normal
2156 return isVMerge(N, indexOffset, 16);
2157 else
2158 return false;
2159 }
2160 return false;
2161}
2162
2163/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2164/// amount, otherwise return -1.
2165/// The ShuffleKind distinguishes between big-endian operations with two
2166/// different inputs (0), either-endian operations with two identical inputs
2167/// (1), and little-endian operations with two different inputs (2). For the
2168/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2169int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2170 SelectionDAG &DAG) {
2171 if (N->getValueType(0) != MVT::v16i8)
2172 return -1;
2173
2174 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2175
2176 // Find the first non-undef value in the shuffle mask.
2177 unsigned i;
2178 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2179 /*search*/;
2180
2181 if (i == 16) return -1; // all undef.
2182
2183 // Otherwise, check to see if the rest of the elements are consecutively
2184 // numbered from this value.
2185 unsigned ShiftAmt = SVOp->getMaskElt(i);
2186 if (ShiftAmt < i) return -1;
2187
2188 ShiftAmt -= i;
2189 bool isLE = DAG.getDataLayout().isLittleEndian();
2190
2191 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2192 // Check the rest of the elements to see if they are consecutive.
2193 for (++i; i != 16; ++i)
2194 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2195 return -1;
2196 } else if (ShuffleKind == 1) {
2197 // Check the rest of the elements to see if they are consecutive.
2198 for (++i; i != 16; ++i)
2199 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2200 return -1;
2201 } else
2202 return -1;
2203
2204 if (isLE)
2205 ShiftAmt = 16 - ShiftAmt;
2206
2207 return ShiftAmt;
2208}
2209
2210/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2211/// specifies a splat of a single element that is suitable for input to
2212/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2214 EVT VT = N->getValueType(0);
2215 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2216 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2217
2218 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2219 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2220
2221 // The consecutive indices need to specify an element, not part of two
2222 // different elements. So abandon ship early if this isn't the case.
2223 if (N->getMaskElt(0) % EltSize != 0)
2224 return false;
2225
2226 // This is a splat operation if each element of the permute is the same, and
2227 // if the value doesn't reference the second vector.
2228 unsigned ElementBase = N->getMaskElt(0);
2229
2230 // FIXME: Handle UNDEF elements too!
2231 if (ElementBase >= 16)
2232 return false;
2233
2234 // Check that the indices are consecutive, in the case of a multi-byte element
2235 // splatted with a v16i8 mask.
2236 for (unsigned i = 1; i != EltSize; ++i)
2237 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2238 return false;
2239
2240 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2241 // An UNDEF element is a sequence of UNDEF bytes.
2242 if (N->getMaskElt(i) < 0) {
2243 for (unsigned j = 1; j != EltSize; ++j)
2244 if (N->getMaskElt(i + j) >= 0)
2245 return false;
2246 } else
2247 for (unsigned j = 0; j != EltSize; ++j)
2248 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2249 return false;
2250 }
2251 return true;
2252}
2253
2254/// Check that the mask is shuffling N byte elements. Within each N byte
2255/// element of the mask, the indices could be either in increasing or
2256/// decreasing order as long as they are consecutive.
2257/// \param[in] N the shuffle vector SD Node to analyze
2258/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2259/// Word/DoubleWord/QuadWord).
2260/// \param[in] StepLen the delta indices number among the N byte element, if
2261/// the mask is in increasing/decreasing order then it is 1/-1.
2262/// \return true iff the mask is shuffling N byte elements.
2263static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2264 int StepLen) {
2265 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2266 "Unexpected element width.");
2267 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2268
2269 unsigned NumOfElem = 16 / Width;
2270 unsigned MaskVal[16]; // Width is never greater than 16
2271 for (unsigned i = 0; i < NumOfElem; ++i) {
2272 MaskVal[0] = N->getMaskElt(i * Width);
2273 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2274 return false;
2275 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2276 return false;
2277 }
2278
2279 for (unsigned int j = 1; j < Width; ++j) {
2280 MaskVal[j] = N->getMaskElt(i * Width + j);
2281 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2282 return false;
2283 }
2284 }
2285 }
2286
2287 return true;
2288}
2289
2290bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2291 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2292 if (!isNByteElemShuffleMask(N, 4, 1))
2293 return false;
2294
2295 // Now we look at mask elements 0,4,8,12
2296 unsigned M0 = N->getMaskElt(0) / 4;
2297 unsigned M1 = N->getMaskElt(4) / 4;
2298 unsigned M2 = N->getMaskElt(8) / 4;
2299 unsigned M3 = N->getMaskElt(12) / 4;
2300 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2301 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2302
2303 // Below, let H and L be arbitrary elements of the shuffle mask
2304 // where H is in the range [4,7] and L is in the range [0,3].
2305 // H, 1, 2, 3 or L, 5, 6, 7
2306 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2307 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2308 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2309 InsertAtByte = IsLE ? 12 : 0;
2310 Swap = M0 < 4;
2311 return true;
2312 }
2313 // 0, H, 2, 3 or 4, L, 6, 7
2314 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2315 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2317 InsertAtByte = IsLE ? 8 : 4;
2318 Swap = M1 < 4;
2319 return true;
2320 }
2321 // 0, 1, H, 3 or 4, 5, L, 7
2322 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2323 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2325 InsertAtByte = IsLE ? 4 : 8;
2326 Swap = M2 < 4;
2327 return true;
2328 }
2329 // 0, 1, 2, H or 4, 5, 6, L
2330 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2331 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2333 InsertAtByte = IsLE ? 0 : 12;
2334 Swap = M3 < 4;
2335 return true;
2336 }
2337
2338 // If both vector operands for the shuffle are the same vector, the mask will
2339 // contain only elements from the first one and the second one will be undef.
2340 if (N->getOperand(1).isUndef()) {
2341 ShiftElts = 0;
2342 Swap = true;
2343 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2344 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2345 InsertAtByte = IsLE ? 12 : 0;
2346 return true;
2347 }
2348 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 8 : 4;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2353 InsertAtByte = IsLE ? 4 : 8;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2357 InsertAtByte = IsLE ? 0 : 12;
2358 return true;
2359 }
2360 }
2361
2362 return false;
2363}
2364
2366 bool &Swap, bool IsLE) {
2367 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2368 // Ensure each byte index of the word is consecutive.
2369 if (!isNByteElemShuffleMask(N, 4, 1))
2370 return false;
2371
2372 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2373 unsigned M0 = N->getMaskElt(0) / 4;
2374 unsigned M1 = N->getMaskElt(4) / 4;
2375 unsigned M2 = N->getMaskElt(8) / 4;
2376 unsigned M3 = N->getMaskElt(12) / 4;
2377
2378 // If both vector operands for the shuffle are the same vector, the mask will
2379 // contain only elements from the first one and the second one will be undef.
2380 if (N->getOperand(1).isUndef()) {
2381 assert(M0 < 4 && "Indexing into an undef vector?");
2382 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2383 return false;
2384
2385 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2386 Swap = false;
2387 return true;
2388 }
2389
2390 // Ensure each word index of the ShuffleVector Mask is consecutive.
2391 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2392 return false;
2393
2394 if (IsLE) {
2395 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2396 // Input vectors don't need to be swapped if the leading element
2397 // of the result is one of the 3 left elements of the second vector
2398 // (or if there is no shift to be done at all).
2399 Swap = false;
2400 ShiftElts = (8 - M0) % 8;
2401 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2402 // Input vectors need to be swapped if the leading element
2403 // of the result is one of the 3 left elements of the first vector
2404 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2405 Swap = true;
2406 ShiftElts = (4 - M0) % 4;
2407 }
2408
2409 return true;
2410 } else { // BE
2411 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2412 // Input vectors don't need to be swapped if the leading element
2413 // of the result is one of the 4 elements of the first vector.
2414 Swap = false;
2415 ShiftElts = M0;
2416 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2417 // Input vectors need to be swapped if the leading element
2418 // of the result is one of the 4 elements of the right vector.
2419 Swap = true;
2420 ShiftElts = M0 - 4;
2421 }
2422
2423 return true;
2424 }
2425}
2426
2428 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2429
2430 if (!isNByteElemShuffleMask(N, Width, -1))
2431 return false;
2432
2433 for (int i = 0; i < 16; i += Width)
2434 if (N->getMaskElt(i) != i + Width - 1)
2435 return false;
2436
2437 return true;
2438}
2439
2441 return isXXBRShuffleMaskHelper(N, 2);
2442}
2443
2445 return isXXBRShuffleMaskHelper(N, 4);
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 8);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 16);
2454}
2455
2456/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2457/// if the inputs to the instruction should be swapped and set \p DM to the
2458/// value for the immediate.
2459/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2460/// AND element 0 of the result comes from the first input (LE) or second input
2461/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2462/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2463/// mask.
2465 bool &Swap, bool IsLE) {
2466 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2467
2468 // Ensure each byte index of the double word is consecutive.
2469 if (!isNByteElemShuffleMask(N, 8, 1))
2470 return false;
2471
2472 unsigned M0 = N->getMaskElt(0) / 8;
2473 unsigned M1 = N->getMaskElt(8) / 8;
2474 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2475
2476 // If both vector operands for the shuffle are the same vector, the mask will
2477 // contain only elements from the first one and the second one will be undef.
2478 if (N->getOperand(1).isUndef()) {
2479 if ((M0 | M1) < 2) {
2480 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2481 Swap = false;
2482 return true;
2483 } else
2484 return false;
2485 }
2486
2487 if (IsLE) {
2488 if (M0 > 1 && M1 < 2) {
2489 Swap = false;
2490 } else if (M0 < 2 && M1 > 1) {
2491 M0 = (M0 + 2) % 4;
2492 M1 = (M1 + 2) % 4;
2493 Swap = true;
2494 } else
2495 return false;
2496
2497 // Note: if control flow comes here that means Swap is already set above
2498 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2499 return true;
2500 } else { // BE
2501 if (M0 < 2 && M1 > 1) {
2502 Swap = false;
2503 } else if (M0 > 1 && M1 < 2) {
2504 M0 = (M0 + 2) % 4;
2505 M1 = (M1 + 2) % 4;
2506 Swap = true;
2507 } else
2508 return false;
2509
2510 // Note: if control flow comes here that means Swap is already set above
2511 DM = (M0 << 1) + (M1 & 1);
2512 return true;
2513 }
2514}
2515
2516
2517/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2518/// appropriate for PPC mnemonics (which have a big endian bias - namely
2519/// elements are counted from the left of the vector register).
2520unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2521 SelectionDAG &DAG) {
2522 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2523 assert(isSplatShuffleMask(SVOp, EltSize));
2524 EVT VT = SVOp->getValueType(0);
2525
2526 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2527 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2528 : SVOp->getMaskElt(0);
2529
2530 if (DAG.getDataLayout().isLittleEndian())
2531 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2532 else
2533 return SVOp->getMaskElt(0) / EltSize;
2534}
2535
2536/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2537/// by using a vspltis[bhw] instruction of the specified element size, return
2538/// the constant being splatted. The ByteSize field indicates the number of
2539/// bytes of each element [124] -> [bhw].
2541 SDValue OpVal;
2542
2543 // If ByteSize of the splat is bigger than the element size of the
2544 // build_vector, then we have a case where we are checking for a splat where
2545 // multiple elements of the buildvector are folded together into a single
2546 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2547 unsigned EltSize = 16/N->getNumOperands();
2548 if (EltSize < ByteSize) {
2549 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2550 SDValue UniquedVals[4];
2551 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2552
2553 // See if all of the elements in the buildvector agree across.
2554 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2555 if (N->getOperand(i).isUndef()) continue;
2556 // If the element isn't a constant, bail fully out.
2557 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2558
2559 if (!UniquedVals[i&(Multiple-1)].getNode())
2560 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2561 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2562 return SDValue(); // no match.
2563 }
2564
2565 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2566 // either constant or undef values that are identical for each chunk. See
2567 // if these chunks can form into a larger vspltis*.
2568
2569 // Check to see if all of the leading entries are either 0 or -1. If
2570 // neither, then this won't fit into the immediate field.
2571 bool LeadingZero = true;
2572 bool LeadingOnes = true;
2573 for (unsigned i = 0; i != Multiple-1; ++i) {
2574 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2575
2576 LeadingZero &= isNullConstant(UniquedVals[i]);
2577 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2578 }
2579 // Finally, check the least significant entry.
2580 if (LeadingZero) {
2581 if (!UniquedVals[Multiple-1].getNode())
2582 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2583 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2584 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2585 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2586 }
2587 if (LeadingOnes) {
2588 if (!UniquedVals[Multiple-1].getNode())
2589 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2590 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2591 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2592 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2593 }
2594
2595 return SDValue();
2596 }
2597
2598 // Check to see if this buildvec has a single non-undef value in its elements.
2599 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2600 if (N->getOperand(i).isUndef()) continue;
2601 if (!OpVal.getNode())
2602 OpVal = N->getOperand(i);
2603 else if (OpVal != N->getOperand(i))
2604 return SDValue();
2605 }
2606
2607 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2608
2609 unsigned ValSizeInBytes = EltSize;
2610 uint64_t Value = 0;
2611 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2612 Value = CN->getZExtValue();
2613 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2614 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2615 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2616 }
2617
2618 // If the splat value is larger than the element value, then we can never do
2619 // this splat. The only case that we could fit the replicated bits into our
2620 // immediate field for would be zero, and we prefer to use vxor for it.
2621 if (ValSizeInBytes < ByteSize) return SDValue();
2622
2623 // If the element value is larger than the splat value, check if it consists
2624 // of a repeated bit pattern of size ByteSize.
2625 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2626 return SDValue();
2627
2628 // Properly sign extend the value.
2629 int MaskVal = SignExtend32(Value, ByteSize * 8);
2630
2631 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2632 if (MaskVal == 0) return SDValue();
2633
2634 // Finally, if this value fits in a 5 bit sext field, return it
2635 if (SignExtend32<5>(MaskVal) == MaskVal)
2636 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2637 return SDValue();
2638}
2639
2640//===----------------------------------------------------------------------===//
2641// Addressing Mode Selection
2642//===----------------------------------------------------------------------===//
2643
2644/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2645/// or 64-bit immediate, and if the value can be accurately represented as a
2646/// sign extension from a 16-bit value. If so, this returns true and the
2647/// immediate.
2648bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2649 if (!isa<ConstantSDNode>(N))
2650 return false;
2651
2652 Imm = (int16_t)N->getAsZExtVal();
2653 if (N->getValueType(0) == MVT::i32)
2654 return Imm == (int32_t)N->getAsZExtVal();
2655 else
2656 return Imm == (int64_t)N->getAsZExtVal();
2657}
2659 return isIntS16Immediate(Op.getNode(), Imm);
2660}
2661
2662/// Used when computing address flags for selecting loads and stores.
2663/// If we have an OR, check if the LHS and RHS are provably disjoint.
2664/// An OR of two provably disjoint values is equivalent to an ADD.
2665/// Most PPC load/store instructions compute the effective address as a sum,
2666/// so doing this conversion is useful.
2667static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2668 if (N.getOpcode() != ISD::OR)
2669 return false;
2670 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2671 if (!LHSKnown.Zero.getBoolValue())
2672 return false;
2673 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2674 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2675}
2676
2677/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2678/// be represented as an indexed [r+r] operation.
2680 SDValue &Index,
2681 SelectionDAG &DAG) const {
2682 for (SDNode *U : N->users()) {
2683 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2684 if (Memop->getMemoryVT() == MVT::f64) {
2685 Base = N.getOperand(0);
2686 Index = N.getOperand(1);
2687 return true;
2688 }
2689 }
2690 }
2691 return false;
2692}
2693
2694/// isIntS34Immediate - This method tests if value of node given can be
2695/// accurately represented as a sign extension from a 34-bit value. If so,
2696/// this returns true and the immediate.
2697bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2698 if (!isa<ConstantSDNode>(N))
2699 return false;
2700
2701 Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue();
2702 return isInt<34>(Imm);
2703}
2705 return isIntS34Immediate(Op.getNode(), Imm);
2706}
2707
2708/// SelectAddressRegReg - Given the specified addressed, check to see if it
2709/// can be represented as an indexed [r+r] operation. Returns false if it
2710/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2711/// non-zero and N can be represented by a base register plus a signed 16-bit
2712/// displacement, make a more precise judgement by checking (displacement % \p
2713/// EncodingAlignment).
2715 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2716 MaybeAlign EncodingAlignment) const {
2717 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2718 // a [pc+imm].
2720 return false;
2721
2722 int16_t Imm = 0;
2723 if (N.getOpcode() == ISD::ADD) {
2724 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2725 // SPE load/store can only handle 8-bit offsets.
2726 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2727 return true;
2728 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2729 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2730 return false; // r+i
2731 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2732 return false; // r+i
2733
2734 Base = N.getOperand(0);
2735 Index = N.getOperand(1);
2736 return true;
2737 } else if (N.getOpcode() == ISD::OR) {
2738 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2739 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2740 return false; // r+i can fold it if we can.
2741
2742 // If this is an or of disjoint bitfields, we can codegen this as an add
2743 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2744 // disjoint.
2745 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2746
2747 if (LHSKnown.Zero.getBoolValue()) {
2748 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2749 // If all of the bits are known zero on the LHS or RHS, the add won't
2750 // carry.
2751 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2752 Base = N.getOperand(0);
2753 Index = N.getOperand(1);
2754 return true;
2755 }
2756 }
2757 }
2758
2759 return false;
2760}
2761
2762// If we happen to be doing an i64 load or store into a stack slot that has
2763// less than a 4-byte alignment, then the frame-index elimination may need to
2764// use an indexed load or store instruction (because the offset may not be a
2765// multiple of 4). The extra register needed to hold the offset comes from the
2766// register scavenger, and it is possible that the scavenger will need to use
2767// an emergency spill slot. As a result, we need to make sure that a spill slot
2768// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2769// stack slot.
2770static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2771 // FIXME: This does not handle the LWA case.
2772 if (VT != MVT::i64)
2773 return;
2774
2775 // NOTE: We'll exclude negative FIs here, which come from argument
2776 // lowering, because there are no known test cases triggering this problem
2777 // using packed structures (or similar). We can remove this exclusion if
2778 // we find such a test case. The reason why this is so test-case driven is
2779 // because this entire 'fixup' is only to prevent crashes (from the
2780 // register scavenger) on not-really-valid inputs. For example, if we have:
2781 // %a = alloca i1
2782 // %b = bitcast i1* %a to i64*
2783 // store i64* a, i64 b
2784 // then the store should really be marked as 'align 1', but is not. If it
2785 // were marked as 'align 1' then the indexed form would have been
2786 // instruction-selected initially, and the problem this 'fixup' is preventing
2787 // won't happen regardless.
2788 if (FrameIdx < 0)
2789 return;
2790
2792 MachineFrameInfo &MFI = MF.getFrameInfo();
2793
2794 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2795 return;
2796
2797 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2798 FuncInfo->setHasNonRISpills();
2799}
2800
2801/// Returns true if the address N can be represented by a base register plus
2802/// a signed 16-bit displacement [r+imm], and if it is not better
2803/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2804/// displacements that are multiples of that value.
2806 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2807 MaybeAlign EncodingAlignment) const {
2808 // FIXME dl should come from parent load or store, not from address
2809 SDLoc dl(N);
2810
2811 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2812 // a [pc+imm].
2814 return false;
2815
2816 // If this can be more profitably realized as r+r, fail.
2817 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2818 return false;
2819
2820 if (N.getOpcode() == ISD::ADD) {
2821 int16_t imm = 0;
2822 if (isIntS16Immediate(N.getOperand(1), imm) &&
2823 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2824 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2825 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2826 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2827 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2828 } else {
2829 Base = N.getOperand(0);
2830 }
2831 return true; // [r+i]
2832 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2833 // Match LOAD (ADD (X, Lo(G))).
2834 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2835 "Cannot handle constant offsets yet!");
2836 Disp = N.getOperand(1).getOperand(0); // The global address.
2841 Base = N.getOperand(0);
2842 return true; // [&g+r]
2843 }
2844 } else if (N.getOpcode() == ISD::OR) {
2845 int16_t imm = 0;
2846 if (isIntS16Immediate(N.getOperand(1), imm) &&
2847 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2848 // If this is an or of disjoint bitfields, we can codegen this as an add
2849 // (for better address arithmetic) if the LHS and RHS of the OR are
2850 // provably disjoint.
2851 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2852
2853 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2854 // If all of the bits are known zero on the LHS or RHS, the add won't
2855 // carry.
2856 if (FrameIndexSDNode *FI =
2857 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2858 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2859 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2860 } else {
2861 Base = N.getOperand(0);
2862 }
2863 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2864 return true;
2865 }
2866 }
2867 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2868 // Loading from a constant address.
2869
2870 // If this address fits entirely in a 16-bit sext immediate field, codegen
2871 // this as "d, 0"
2872 int16_t Imm;
2873 if (isIntS16Immediate(CN, Imm) &&
2874 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2875 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2876 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2877 CN->getValueType(0));
2878 return true;
2879 }
2880
2881 // Handle 32-bit sext immediates with LIS + addr mode.
2882 if ((CN->getValueType(0) == MVT::i32 ||
2883 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2884 (!EncodingAlignment ||
2885 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2886 int Addr = (int)CN->getZExtValue();
2887
2888 // Otherwise, break this down into an LIS + disp.
2889 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2890
2891 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2892 MVT::i32);
2893 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2894 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2895 return true;
2896 }
2897 }
2898
2899 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2900 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2901 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2902 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2903 } else
2904 Base = N;
2905 return true; // [r+0]
2906}
2907
2908/// Similar to the 16-bit case but for instructions that take a 34-bit
2909/// displacement field (prefixed loads/stores).
2911 SDValue &Base,
2912 SelectionDAG &DAG) const {
2913 // Only on 64-bit targets.
2914 if (N.getValueType() != MVT::i64)
2915 return false;
2916
2917 SDLoc dl(N);
2918 int64_t Imm = 0;
2919
2920 if (N.getOpcode() == ISD::ADD) {
2921 if (!isIntS34Immediate(N.getOperand(1), Imm))
2922 return false;
2923 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2924 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2925 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2926 else
2927 Base = N.getOperand(0);
2928 return true;
2929 }
2930
2931 if (N.getOpcode() == ISD::OR) {
2932 if (!isIntS34Immediate(N.getOperand(1), Imm))
2933 return false;
2934 // If this is an or of disjoint bitfields, we can codegen this as an add
2935 // (for better address arithmetic) if the LHS and RHS of the OR are
2936 // provably disjoint.
2937 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2938 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2939 return false;
2940 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2941 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2942 else
2943 Base = N.getOperand(0);
2944 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2945 return true;
2946 }
2947
2948 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2949 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2950 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2951 return true;
2952 }
2953
2954 return false;
2955}
2956
2957/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2958/// represented as an indexed [r+r] operation.
2960 SDValue &Index,
2961 SelectionDAG &DAG) const {
2962 // Check to see if we can easily represent this as an [r+r] address. This
2963 // will fail if it thinks that the address is more profitably represented as
2964 // reg+imm, e.g. where imm = 0.
2965 if (SelectAddressRegReg(N, Base, Index, DAG))
2966 return true;
2967
2968 // If the address is the result of an add, we will utilize the fact that the
2969 // address calculation includes an implicit add. However, we can reduce
2970 // register pressure if we do not materialize a constant just for use as the
2971 // index register. We only get rid of the add if it is not an add of a
2972 // value and a 16-bit signed constant and both have a single use.
2973 int16_t imm = 0;
2974 if (N.getOpcode() == ISD::ADD &&
2975 (!isIntS16Immediate(N.getOperand(1), imm) ||
2976 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2977 Base = N.getOperand(0);
2978 Index = N.getOperand(1);
2979 return true;
2980 }
2981
2982 // Otherwise, do it the hard way, using R0 as the base register.
2983 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2984 N.getValueType());
2985 Index = N;
2986 return true;
2987}
2988
2989template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2990 Ty *PCRelCand = dyn_cast<Ty>(N);
2991 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2992}
2993
2994/// Returns true if this address is a PC Relative address.
2995/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2996/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2998 // This is a materialize PC Relative node. Always select this as PC Relative.
2999 Base = N;
3000 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3001 return true;
3002 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3003 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3004 isValidPCRelNode<JumpTableSDNode>(N) ||
3005 isValidPCRelNode<BlockAddressSDNode>(N))
3006 return true;
3007 return false;
3008}
3009
3010/// Returns true if we should use a direct load into vector instruction
3011/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3012static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3013
3014 // If there are any other uses other than scalar to vector, then we should
3015 // keep it as a scalar load -> direct move pattern to prevent multiple
3016 // loads.
3017 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3018 if (!LD)
3019 return false;
3020
3021 EVT MemVT = LD->getMemoryVT();
3022 if (!MemVT.isSimple())
3023 return false;
3024 switch(MemVT.getSimpleVT().SimpleTy) {
3025 case MVT::i64:
3026 break;
3027 case MVT::i32:
3028 if (!ST.hasP8Vector())
3029 return false;
3030 break;
3031 case MVT::i16:
3032 case MVT::i8:
3033 if (!ST.hasP9Vector())
3034 return false;
3035 break;
3036 default:
3037 return false;
3038 }
3039
3040 SDValue LoadedVal(N, 0);
3041 if (!LoadedVal.hasOneUse())
3042 return false;
3043
3044 for (SDUse &Use : LD->uses())
3045 if (Use.getResNo() == 0 &&
3046 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3048 return false;
3049
3050 return true;
3051}
3052
3053/// getPreIndexedAddressParts - returns true by value, base pointer and
3054/// offset pointer and addressing mode by reference if the node's address
3055/// can be legally represented as pre-indexed load / store address.
3057 SDValue &Offset,
3059 SelectionDAG &DAG) const {
3060 if (DisablePPCPreinc) return false;
3061
3062 bool isLoad = true;
3063 SDValue Ptr;
3064 EVT VT;
3065 Align Alignment;
3066 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3067 Ptr = LD->getBasePtr();
3068 VT = LD->getMemoryVT();
3069 Alignment = LD->getAlign();
3070 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3071 Ptr = ST->getBasePtr();
3072 VT = ST->getMemoryVT();
3073 Alignment = ST->getAlign();
3074 isLoad = false;
3075 } else
3076 return false;
3077
3078 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3079 // instructions because we can fold these into a more efficient instruction
3080 // instead, (such as LXSD).
3081 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3082 return false;
3083 }
3084
3085 // PowerPC doesn't have preinc load/store instructions for vectors
3086 if (VT.isVector())
3087 return false;
3088
3089 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3090 // Common code will reject creating a pre-inc form if the base pointer
3091 // is a frame index, or if N is a store and the base pointer is either
3092 // the same as or a predecessor of the value being stored. Check for
3093 // those situations here, and try with swapped Base/Offset instead.
3094 bool Swap = false;
3095
3096 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3097 Swap = true;
3098 else if (!isLoad) {
3099 SDValue Val = cast<StoreSDNode>(N)->getValue();
3100 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3101 Swap = true;
3102 }
3103
3104 if (Swap)
3106
3107 AM = ISD::PRE_INC;
3108 return true;
3109 }
3110
3111 // LDU/STU can only handle immediates that are a multiple of 4.
3112 if (VT != MVT::i64) {
3113 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3114 return false;
3115 } else {
3116 // LDU/STU need an address with at least 4-byte alignment.
3117 if (Alignment < Align(4))
3118 return false;
3119
3120 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3121 return false;
3122 }
3123
3124 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3125 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3126 // sext i32 to i64 when addr mode is r+i.
3127 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3128 LD->getExtensionType() == ISD::SEXTLOAD &&
3129 isa<ConstantSDNode>(Offset))
3130 return false;
3131 }
3132
3133 AM = ISD::PRE_INC;
3134 return true;
3135}
3136
3137//===----------------------------------------------------------------------===//
3138// LowerOperation implementation
3139//===----------------------------------------------------------------------===//
3140
3141/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3142/// and LoOpFlags to the target MO flags.
3143static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3144 unsigned &HiOpFlags, unsigned &LoOpFlags,
3145 const GlobalValue *GV = nullptr) {
3146 HiOpFlags = PPCII::MO_HA;
3147 LoOpFlags = PPCII::MO_LO;
3148
3149 // Don't use the pic base if not in PIC relocation model.
3150 if (IsPIC) {
3151 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3152 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3153 }
3154}
3155
3156static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3157 SelectionDAG &DAG) {
3158 SDLoc DL(HiPart);
3159 EVT PtrVT = HiPart.getValueType();
3160 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3161
3162 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3163 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3164
3165 // With PIC, the first instruction is actually "GR+hi(&G)".
3166 if (isPIC)
3167 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3168 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3169
3170 // Generate non-pic code that has direct accesses to the constant pool.
3171 // The address of the global is just (hi(&g)+lo(&g)).
3172 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3173}
3174
3176 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3177 FuncInfo->setUsesTOCBasePtr();
3178}
3179
3182}
3183
3184SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3185 SDValue GA) const {
3186 EVT VT = Subtarget.getScalarIntVT();
3187 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3188 : Subtarget.isAIXABI()
3189 ? DAG.getRegister(PPC::R2, VT)
3190 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3191 SDValue Ops[] = { GA, Reg };
3192 return DAG.getMemIntrinsicNode(
3193 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3196}
3197
3198SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3199 SelectionDAG &DAG) const {
3200 EVT PtrVT = Op.getValueType();
3201 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3202 const Constant *C = CP->getConstVal();
3203
3204 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3205 // The actual address of the GlobalValue is stored in the TOC.
3206 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3207 if (Subtarget.isUsingPCRelativeCalls()) {
3208 SDLoc DL(CP);
3209 EVT Ty = getPointerTy(DAG.getDataLayout());
3210 SDValue ConstPool = DAG.getTargetConstantPool(
3211 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3212 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3213 }
3214 setUsesTOCBasePtr(DAG);
3215 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3216 return getTOCEntry(DAG, SDLoc(CP), GA);
3217 }
3218
3219 unsigned MOHiFlag, MOLoFlag;
3220 bool IsPIC = isPositionIndependent();
3221 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3222
3223 if (IsPIC && Subtarget.isSVR4ABI()) {
3224 SDValue GA =
3225 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3226 return getTOCEntry(DAG, SDLoc(CP), GA);
3227 }
3228
3229 SDValue CPIHi =
3230 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3231 SDValue CPILo =
3232 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3233 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3234}
3235
3236// For 64-bit PowerPC, prefer the more compact relative encodings.
3237// This trades 32 bits per jump table entry for one or two instructions
3238// on the jump site.
3240 if (isJumpTableRelative())
3242
3244}
3245
3248 return false;
3249 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3250 return true;
3252}
3253
3255 SelectionDAG &DAG) const {
3256 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3258
3259 switch (getTargetMachine().getCodeModel()) {
3260 case CodeModel::Small:
3261 case CodeModel::Medium:
3263 default:
3264 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3266 }
3267}
3268
3269const MCExpr *
3271 unsigned JTI,
3272 MCContext &Ctx) const {
3273 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3275
3276 switch (getTargetMachine().getCodeModel()) {
3277 case CodeModel::Small:
3278 case CodeModel::Medium:
3280 default:
3281 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3282 }
3283}
3284
3285SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3286 EVT PtrVT = Op.getValueType();
3287 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3288
3289 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3290 if (Subtarget.isUsingPCRelativeCalls()) {
3291 SDLoc DL(JT);
3292 EVT Ty = getPointerTy(DAG.getDataLayout());
3293 SDValue GA =
3294 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3295 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3296 return MatAddr;
3297 }
3298
3299 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3300 // The actual address of the GlobalValue is stored in the TOC.
3301 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3302 setUsesTOCBasePtr(DAG);
3303 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3304 return getTOCEntry(DAG, SDLoc(JT), GA);
3305 }
3306
3307 unsigned MOHiFlag, MOLoFlag;
3308 bool IsPIC = isPositionIndependent();
3309 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3310
3311 if (IsPIC && Subtarget.isSVR4ABI()) {
3312 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3314 return getTOCEntry(DAG, SDLoc(GA), GA);
3315 }
3316
3317 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3318 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3319 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3320}
3321
3322SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3323 SelectionDAG &DAG) const {
3324 EVT PtrVT = Op.getValueType();
3325 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3326 const BlockAddress *BA = BASDN->getBlockAddress();
3327
3328 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3329 if (Subtarget.isUsingPCRelativeCalls()) {
3330 SDLoc DL(BASDN);
3331 EVT Ty = getPointerTy(DAG.getDataLayout());
3332 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3334 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3335 return MatAddr;
3336 }
3337
3338 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3339 // The actual BlockAddress is stored in the TOC.
3340 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3341 setUsesTOCBasePtr(DAG);
3342 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3343 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3344 }
3345
3346 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3347 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3348 return getTOCEntry(
3349 DAG, SDLoc(BASDN),
3350 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3351
3352 unsigned MOHiFlag, MOLoFlag;
3353 bool IsPIC = isPositionIndependent();
3354 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3355 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3356 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3357 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3358}
3359
3360SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3361 SelectionDAG &DAG) const {
3362 if (Subtarget.isAIXABI())
3363 return LowerGlobalTLSAddressAIX(Op, DAG);
3364
3365 return LowerGlobalTLSAddressLinux(Op, DAG);
3366}
3367
3368/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3369/// and then apply the update.
3371 SelectionDAG &DAG,
3372 const TargetMachine &TM) {
3373 // Initialize TLS model opt setting lazily:
3374 // (1) Use initial-exec for single TLS var references within current function.
3375 // (2) Use local-dynamic for multiple TLS var references within current
3376 // function.
3377 PPCFunctionInfo *FuncInfo =
3379 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3381 // Iterate over all instructions within current function, collect all TLS
3382 // global variables (global variables taken as the first parameter to
3383 // Intrinsic::threadlocal_address).
3384 const Function &Func = DAG.getMachineFunction().getFunction();
3385 for (const BasicBlock &BB : Func)
3386 for (const Instruction &I : BB)
3387 if (I.getOpcode() == Instruction::Call)
3388 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3389 if (Function *CF = CI->getCalledFunction())
3390 if (CF->isDeclaration() &&
3391 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3392 if (const GlobalValue *GV =
3393 dyn_cast<GlobalValue>(I.getOperand(0))) {
3394 TLSModel::Model GVModel = TM.getTLSModel(GV);
3395 if (GVModel == TLSModel::LocalDynamic)
3396 TLSGV.insert(GV);
3397 }
3398
3399 unsigned TLSGVCnt = TLSGV.size();
3400 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3401 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3402 FuncInfo->setAIXFuncUseTLSIEForLD();
3404 }
3405
3406 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3407 LLVM_DEBUG(
3408 dbgs() << DAG.getMachineFunction().getName()
3409 << " function is using the TLS-IE model for TLS-LD access.\n");
3410 Model = TLSModel::InitialExec;
3411 }
3412}
3413
3414SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3415 SelectionDAG &DAG) const {
3416 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3417
3418 if (DAG.getTarget().useEmulatedTLS())
3419 report_fatal_error("Emulated TLS is not yet supported on AIX");
3420
3421 SDLoc dl(GA);
3422 const GlobalValue *GV = GA->getGlobal();
3423 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3424 bool Is64Bit = Subtarget.isPPC64();
3426
3427 // Apply update to the TLS model.
3428 if (Subtarget.hasAIXShLibTLSModelOpt())
3430
3431 // TLS variables are accessed through TOC entries.
3432 // To support this, set the DAG to use the TOC base pointer.
3433 setUsesTOCBasePtr(DAG);
3434
3435 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3436
3437 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3438 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3439 bool HasAIXSmallTLSGlobalAttr = false;
3440 SDValue VariableOffsetTGA =
3441 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3442 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3443 SDValue TLSReg;
3444
3445 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3446 if (GVar->hasAttribute("aix-small-tls"))
3447 HasAIXSmallTLSGlobalAttr = true;
3448
3449 if (Is64Bit) {
3450 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3451 // involves a load of the variable offset (from the TOC), followed by an
3452 // add of the loaded variable offset to R13 (the thread pointer).
3453 // This code sequence looks like:
3454 // ld reg1,var[TC](2)
3455 // add reg2, reg1, r13 // r13 contains the thread pointer
3456 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3457
3458 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3459 // global variable attribute, produce a faster access sequence for
3460 // local-exec TLS variables where the offset from the TLS base is encoded
3461 // as an immediate operand.
3462 //
3463 // We only utilize the faster local-exec access sequence when the TLS
3464 // variable has a size within the policy limit. We treat types that are
3465 // not sized or are empty as being over the policy size limit.
3466 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3467 IsTLSLocalExecModel) {
3468 Type *GVType = GV->getValueType();
3469 if (GVType->isSized() && !GVType->isEmptyTy() &&
3470 GV->getDataLayout().getTypeAllocSize(GVType) <=
3472 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3473 }
3474 } else {
3475 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3476 // involves loading the variable offset from the TOC, generating a call to
3477 // .__get_tpointer to get the thread pointer (which will be in R3), and
3478 // adding the two together:
3479 // lwz reg1,var[TC](2)
3480 // bla .__get_tpointer
3481 // add reg2, reg1, r3
3482 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3483
3484 // We do not implement the 32-bit version of the faster access sequence
3485 // for local-exec that is controlled by the -maix-small-local-exec-tls
3486 // option, or the "aix-small-tls" global variable attribute.
3487 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3488 report_fatal_error("The small-local-exec TLS access sequence is "
3489 "currently only supported on AIX (64-bit mode).");
3490 }
3491 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3492 }
3493
3494 if (Model == TLSModel::LocalDynamic) {
3495 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3496
3497 // We do not implement the 32-bit version of the faster access sequence
3498 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3499 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3500 report_fatal_error("The small-local-dynamic TLS access sequence is "
3501 "currently only supported on AIX (64-bit mode).");
3502
3503 // For local-dynamic on AIX, we need to generate one TOC entry for each
3504 // variable offset, and a single module-handle TOC entry for the entire
3505 // file.
3506
3507 SDValue VariableOffsetTGA =
3508 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3509 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3510
3512 GlobalVariable *TLSGV =
3513 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3514 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3516 assert(TLSGV && "Not able to create GV for _$TLSML.");
3517 SDValue ModuleHandleTGA =
3518 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3519 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3520 SDValue ModuleHandle =
3521 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3522
3523 // With the -maix-small-local-dynamic-tls option, produce a faster access
3524 // sequence for local-dynamic TLS variables where the offset from the
3525 // module-handle is encoded as an immediate operand.
3526 //
3527 // We only utilize the faster local-dynamic access sequence when the TLS
3528 // variable has a size within the policy limit. We treat types that are
3529 // not sized or are empty as being over the policy size limit.
3530 if (HasAIXSmallLocalDynamicTLS) {
3531 Type *GVType = GV->getValueType();
3532 if (GVType->isSized() && !GVType->isEmptyTy() &&
3533 GV->getDataLayout().getTypeAllocSize(GVType) <=
3535 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3536 ModuleHandle);
3537 }
3538
3539 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3540 }
3541
3542 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3543 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3544 // need to generate two TOC entries, one for the variable offset, one for the
3545 // region handle. The global address for the TOC entry of the region handle is
3546 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3547 // entry of the variable offset is created with MO_TLSGD_FLAG.
3548 SDValue VariableOffsetTGA =
3549 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3550 SDValue RegionHandleTGA =
3551 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3552 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3553 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3554 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3555 RegionHandle);
3556}
3557
3558SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 // FIXME: TLS addresses currently use medium model code sequences,
3561 // which is the most useful form. Eventually support for small and
3562 // large models could be added if users need it, at the cost of
3563 // additional complexity.
3564 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3565 if (DAG.getTarget().useEmulatedTLS())
3566 return LowerToTLSEmulatedModel(GA, DAG);
3567
3568 SDLoc dl(GA);
3569 const GlobalValue *GV = GA->getGlobal();
3570 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3571 bool is64bit = Subtarget.isPPC64();
3572 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3573 PICLevel::Level picLevel = M->getPICLevel();
3574
3576 TLSModel::Model Model = TM.getTLSModel(GV);
3577
3578 if (Model == TLSModel::LocalExec) {
3579 if (Subtarget.isUsingPCRelativeCalls()) {
3580 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3581 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3583 SDValue MatAddr =
3584 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3585 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3586 }
3587
3588 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3590 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3592 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3593 : DAG.getRegister(PPC::R2, MVT::i32);
3594
3595 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3596 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3597 }
3598
3599 if (Model == TLSModel::InitialExec) {
3600 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3602 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3603 SDValue TGATLS = DAG.getTargetGlobalAddress(
3604 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3605 SDValue TPOffset;
3606 if (IsPCRel) {
3607 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3608 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3610 } else {
3611 SDValue GOTPtr;
3612 if (is64bit) {
3613 setUsesTOCBasePtr(DAG);
3614 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3615 GOTPtr =
3616 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3617 } else {
3618 if (!TM.isPositionIndependent())
3619 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3620 else if (picLevel == PICLevel::SmallPIC)
3621 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3622 else
3623 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3624 }
3625 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3626 }
3627 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3628 }
3629
3630 if (Model == TLSModel::GeneralDynamic) {
3631 if (Subtarget.isUsingPCRelativeCalls()) {
3632 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3634 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3635 }
3636
3637 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3638 SDValue GOTPtr;
3639 if (is64bit) {
3640 setUsesTOCBasePtr(DAG);
3641 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3642 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3643 GOTReg, TGA);
3644 } else {
3645 if (picLevel == PICLevel::SmallPIC)
3646 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3647 else
3648 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3649 }
3650 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3651 GOTPtr, TGA, TGA);
3652 }
3653
3654 if (Model == TLSModel::LocalDynamic) {
3655 if (Subtarget.isUsingPCRelativeCalls()) {
3656 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3658 SDValue MatPCRel =
3659 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3660 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3661 }
3662
3663 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3664 SDValue GOTPtr;
3665 if (is64bit) {
3666 setUsesTOCBasePtr(DAG);
3667 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3668 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3669 GOTReg, TGA);
3670 } else {
3671 if (picLevel == PICLevel::SmallPIC)
3672 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3673 else
3674 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3675 }
3676 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3677 PtrVT, GOTPtr, TGA, TGA);
3678 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3679 PtrVT, TLSAddr, TGA);
3680 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3681 }
3682
3683 llvm_unreachable("Unknown TLS model!");
3684}
3685
3686SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3687 SelectionDAG &DAG) const {
3688 EVT PtrVT = Op.getValueType();
3689 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3690 SDLoc DL(GSDN);
3691 const GlobalValue *GV = GSDN->getGlobal();
3692
3693 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3694 // The actual address of the GlobalValue is stored in the TOC.
3695 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3696 if (Subtarget.isUsingPCRelativeCalls()) {
3697 EVT Ty = getPointerTy(DAG.getDataLayout());
3699 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3701 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3702 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3704 return Load;
3705 } else {
3706 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3708 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3709 }
3710 }
3711 setUsesTOCBasePtr(DAG);
3712 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3713 return getTOCEntry(DAG, DL, GA);
3714 }
3715
3716 unsigned MOHiFlag, MOLoFlag;
3717 bool IsPIC = isPositionIndependent();
3718 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3719
3720 if (IsPIC && Subtarget.isSVR4ABI()) {
3721 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3722 GSDN->getOffset(),
3724 return getTOCEntry(DAG, DL, GA);
3725 }
3726
3727 SDValue GAHi =
3728 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3729 SDValue GALo =
3730 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3731
3732 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3733}
3734
3735SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3736 bool IsStrict = Op->isStrictFPOpcode();
3737 ISD::CondCode CC =
3738 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3739 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3740 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3741 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3742 EVT LHSVT = LHS.getValueType();
3743 SDLoc dl(Op);
3744
3745 // Soften the setcc with libcall if it is fp128.
3746 if (LHSVT == MVT::f128) {
3747 assert(!Subtarget.hasP9Vector() &&
3748 "SETCC for f128 is already legal under Power9!");
3749 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3750 Op->getOpcode() == ISD::STRICT_FSETCCS);
3751 if (RHS.getNode())
3752 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3753 DAG.getCondCode(CC));
3754 if (IsStrict)
3755 return DAG.getMergeValues({LHS, Chain}, dl);
3756 return LHS;
3757 }
3758
3759 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3760
3761 if (Op.getValueType() == MVT::v2i64) {
3762 // When the operands themselves are v2i64 values, we need to do something
3763 // special because VSX has no underlying comparison operations for these.
3764 if (LHS.getValueType() == MVT::v2i64) {
3765 // Equality can be handled by casting to the legal type for Altivec
3766 // comparisons, everything else needs to be expanded.
3767 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3768 return SDValue();
3769 SDValue SetCC32 = DAG.getSetCC(
3770 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3771 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3772 int ShuffV[] = {1, 0, 3, 2};
3773 SDValue Shuff =
3774 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3775 return DAG.getBitcast(MVT::v2i64,
3776 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3777 dl, MVT::v4i32, Shuff, SetCC32));
3778 }
3779
3780 // We handle most of these in the usual way.
3781 return Op;
3782 }
3783
3784 // If we're comparing for equality to zero, expose the fact that this is
3785 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3786 // fold the new nodes.
3787 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3788 return V;
3789
3790 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3791 // Leave comparisons against 0 and -1 alone for now, since they're usually
3792 // optimized. FIXME: revisit this when we can custom lower all setcc
3793 // optimizations.
3794 if (C->isAllOnes() || C->isZero())
3795 return SDValue();
3796 }
3797
3798 // If we have an integer seteq/setne, turn it into a compare against zero
3799 // by xor'ing the rhs with the lhs, which is faster than setting a
3800 // condition register, reading it back out, and masking the correct bit. The
3801 // normal approach here uses sub to do this instead of xor. Using xor exposes
3802 // the result to other bit-twiddling opportunities.
3803 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3804 EVT VT = Op.getValueType();
3805 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3806 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3807 }
3808 return SDValue();
3809}
3810
3811SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3812 SDNode *Node = Op.getNode();
3813 EVT VT = Node->getValueType(0);
3814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3815 SDValue InChain = Node->getOperand(0);
3816 SDValue VAListPtr = Node->getOperand(1);
3817 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3818 SDLoc dl(Node);
3819
3820 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3821
3822 // gpr_index
3823 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3824 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3825 InChain = GprIndex.getValue(1);
3826
3827 if (VT == MVT::i64) {
3828 // Check if GprIndex is even
3829 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3830 DAG.getConstant(1, dl, MVT::i32));
3831 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3832 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3833 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3834 DAG.getConstant(1, dl, MVT::i32));
3835 // Align GprIndex to be even if it isn't
3836 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3837 GprIndex);
3838 }
3839
3840 // fpr index is 1 byte after gpr
3841 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3842 DAG.getConstant(1, dl, MVT::i32));
3843
3844 // fpr
3845 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3846 FprPtr, MachinePointerInfo(SV), MVT::i8);
3847 InChain = FprIndex.getValue(1);
3848
3849 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(8, dl, MVT::i32));
3851
3852 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3853 DAG.getConstant(4, dl, MVT::i32));
3854
3855 // areas
3856 SDValue OverflowArea =
3857 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3858 InChain = OverflowArea.getValue(1);
3859
3860 SDValue RegSaveArea =
3861 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3862 InChain = RegSaveArea.getValue(1);
3863
3864 // select overflow_area if index > 8
3865 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3866 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3867
3868 // adjustment constant gpr_index * 4/8
3869 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3870 VT.isInteger() ? GprIndex : FprIndex,
3871 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3872 MVT::i32));
3873
3874 // OurReg = RegSaveArea + RegConstant
3875 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3876 RegConstant);
3877
3878 // Floating types are 32 bytes into RegSaveArea
3879 if (VT.isFloatingPoint())
3880 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3881 DAG.getConstant(32, dl, MVT::i32));
3882
3883 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3884 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3885 VT.isInteger() ? GprIndex : FprIndex,
3886 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3887 MVT::i32));
3888
3889 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3890 VT.isInteger() ? VAListPtr : FprPtr,
3891 MachinePointerInfo(SV), MVT::i8);
3892
3893 // determine if we should load from reg_save_area or overflow_area
3894 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3895
3896 // increase overflow_area by 4/8 if gpr/fpr > 8
3897 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3898 DAG.getConstant(VT.isInteger() ? 4 : 8,
3899 dl, MVT::i32));
3900
3901 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3902 OverflowAreaPlusN);
3903
3904 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3905 MachinePointerInfo(), MVT::i32);
3906
3907 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3908}
3909
3910SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3911 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3912
3913 // We have to copy the entire va_list struct:
3914 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3915 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3916 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3917 false, true, /*CI=*/nullptr, std::nullopt,
3919}
3920
3921SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3922 SelectionDAG &DAG) const {
3923 return Op.getOperand(0);
3924}
3925
3926SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3929
3930 assert((Op.getOpcode() == ISD::INLINEASM ||
3931 Op.getOpcode() == ISD::INLINEASM_BR) &&
3932 "Expecting Inline ASM node.");
3933
3934 // If an LR store is already known to be required then there is not point in
3935 // checking this ASM as well.
3936 if (MFI.isLRStoreRequired())
3937 return Op;
3938
3939 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3940 // type MVT::Glue. We want to ignore this last operand if that is the case.
3941 unsigned NumOps = Op.getNumOperands();
3942 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3943 --NumOps;
3944
3945 // Check all operands that may contain the LR.
3946 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3947 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3948 unsigned NumVals = Flags.getNumOperandRegisters();
3949 ++i; // Skip the ID value.
3950
3951 switch (Flags.getKind()) {
3952 default:
3953 llvm_unreachable("Bad flags!");
3957 i += NumVals;
3958 break;
3962 for (; NumVals; --NumVals, ++i) {
3963 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3964 if (Reg != PPC::LR && Reg != PPC::LR8)
3965 continue;
3966 MFI.setLRStoreRequired();
3967 return Op;
3968 }
3969 break;
3970 }
3971 }
3972 }
3973
3974 return Op;
3975}
3976
3977SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3978 SelectionDAG &DAG) const {
3979 SDValue Chain = Op.getOperand(0);
3980 SDValue Trmp = Op.getOperand(1); // trampoline
3981 SDValue FPtr = Op.getOperand(2); // nested function
3982 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3983 SDLoc dl(Op);
3984
3985 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3986
3987 if (Subtarget.isAIXABI()) {
3988 // On AIX we create a trampoline descriptor by combining the
3989 // entry point and TOC from the global descriptor (FPtr) with the
3990 // nest argument as the environment pointer.
3991 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3992 MaybeAlign PointerAlign(PointerSize);
3993 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3997
3998 uint64_t TOCPointerOffset = 1 * PointerSize;
3999 uint64_t EnvPointerOffset = 2 * PointerSize;
4000 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
4001 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
4002
4003 const Value *TrampolineAddr =
4004 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4005 const Function *Func =
4006 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
4007
4008 SDValue OutChains[3];
4009
4010 // Copy the entry point address from the global descriptor to the
4011 // trampoline buffer.
4012 SDValue LoadEntryPoint =
4013 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
4014 PointerAlign, MMOFlags);
4015 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
4016 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
4017 MachinePointerInfo(TrampolineAddr, 0));
4018
4019 // Copy the TOC pointer from the global descriptor to the trampoline
4020 // buffer.
4021 SDValue TOCFromDescriptorPtr =
4022 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
4023 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
4024 MachinePointerInfo(Func, TOCPointerOffset),
4025 PointerAlign, MMOFlags);
4026 SDValue TrampolineTOCPointer =
4027 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
4028 SDValue TOCLoadChain = TOCReg.getValue(1);
4029 OutChains[1] =
4030 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
4031 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
4032
4033 // Store the nest argument into the environment pointer in the trampoline
4034 // buffer.
4035 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
4036 OutChains[2] =
4037 DAG.getStore(Chain, dl, Nest, EnvPointer,
4038 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
4039
4041 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
4042 return TokenFactor;
4043 }
4044
4045 bool isPPC64 = (PtrVT == MVT::i64);
4046 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4047
4049 Args.emplace_back(Trmp, IntPtrTy);
4050 // TrampSize == (isPPC64 ? 48 : 40);
4051 Args.emplace_back(
4052 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
4053 IntPtrTy);
4054 Args.emplace_back(FPtr, IntPtrTy);
4055 Args.emplace_back(Nest, IntPtrTy);
4056
4057 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4059 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4061 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4062
4063 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4064 return CallResult.second;
4065}
4066
4067SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4069 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4070 EVT PtrVT = getPointerTy(MF.getDataLayout());
4071
4072 SDLoc dl(Op);
4073
4074 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4075 // vastart just stores the address of the VarArgsFrameIndex slot into the
4076 // memory location argument.
4077 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4078 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4079 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4080 MachinePointerInfo(SV));
4081 }
4082
4083 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4084 // We suppose the given va_list is already allocated.
4085 //
4086 // typedef struct {
4087 // char gpr; /* index into the array of 8 GPRs
4088 // * stored in the register save area
4089 // * gpr=0 corresponds to r3,
4090 // * gpr=1 to r4, etc.
4091 // */
4092 // char fpr; /* index into the array of 8 FPRs
4093 // * stored in the register save area
4094 // * fpr=0 corresponds to f1,
4095 // * fpr=1 to f2, etc.
4096 // */
4097 // char *overflow_arg_area;
4098 // /* location on stack that holds
4099 // * the next overflow argument
4100 // */
4101 // char *reg_save_area;
4102 // /* where r3:r10 and f1:f8 (if saved)
4103 // * are stored
4104 // */
4105 // } va_list[1];
4106
4107 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4108 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4109 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4110 PtrVT);
4111 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4112 PtrVT);
4113
4114 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4115 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4116
4117 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4118 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4119
4120 uint64_t FPROffset = 1;
4121 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4122
4123 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4124
4125 // Store first byte : number of int regs
4126 SDValue firstStore =
4127 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4128 MachinePointerInfo(SV), MVT::i8);
4129 uint64_t nextOffset = FPROffset;
4130 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4131 ConstFPROffset);
4132
4133 // Store second byte : number of float regs
4134 SDValue secondStore =
4135 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4136 MachinePointerInfo(SV, nextOffset), MVT::i8);
4137 nextOffset += StackOffset;
4138 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4139
4140 // Store second word : arguments given on stack
4141 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4142 MachinePointerInfo(SV, nextOffset));
4143 nextOffset += FrameOffset;
4144 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4145
4146 // Store third word : arguments given in registers
4147 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4148 MachinePointerInfo(SV, nextOffset));
4149}
4150
4151/// FPR - The set of FP registers that should be allocated for arguments
4152/// on Darwin and AIX.
4153static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4154 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4155 PPC::F11, PPC::F12, PPC::F13};
4156
4157/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4158/// the stack.
4159static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4160 unsigned PtrByteSize) {
4161 unsigned ArgSize = ArgVT.getStoreSize();
4162 if (Flags.isByVal())
4163 ArgSize = Flags.getByValSize();
4164
4165 // Round up to multiples of the pointer size, except for array members,
4166 // which are always packed.
4167 if (!Flags.isInConsecutiveRegs())
4168 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4169
4170 return ArgSize;
4171}
4172
4173/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4174/// on the stack.
4176 ISD::ArgFlagsTy Flags,
4177 unsigned PtrByteSize) {
4178 Align Alignment(PtrByteSize);
4179
4180 // Altivec parameters are padded to a 16 byte boundary.
4181 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4182 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4183 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4184 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4185 Alignment = Align(16);
4186
4187 // ByVal parameters are aligned as requested.
4188 if (Flags.isByVal()) {
4189 auto BVAlign = Flags.getNonZeroByValAlign();
4190 if (BVAlign > PtrByteSize) {
4191 if (BVAlign.value() % PtrByteSize != 0)
4193 "ByVal alignment is not a multiple of the pointer size");
4194
4195 Alignment = BVAlign;
4196 }
4197 }
4198
4199 // Array members are always packed to their original alignment.
4200 if (Flags.isInConsecutiveRegs()) {
4201 // If the array member was split into multiple registers, the first
4202 // needs to be aligned to the size of the full type. (Except for
4203 // ppcf128, which is only aligned as its f64 components.)
4204 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4205 Alignment = Align(OrigVT.getStoreSize());
4206 else
4207 Alignment = Align(ArgVT.getStoreSize());
4208 }
4209
4210 return Alignment;
4211}
4212
4213/// CalculateStackSlotUsed - Return whether this argument will use its
4214/// stack slot (instead of being passed in registers). ArgOffset,
4215/// AvailableFPRs, and AvailableVRs must hold the current argument
4216/// position, and will be updated to account for this argument.
4217static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4218 unsigned PtrByteSize, unsigned LinkageSize,
4219 unsigned ParamAreaSize, unsigned &ArgOffset,
4220 unsigned &AvailableFPRs,
4221 unsigned &AvailableVRs) {
4222 bool UseMemory = false;
4223
4224 // Respect alignment of argument on the stack.
4225 Align Alignment =
4226 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4227 ArgOffset = alignTo(ArgOffset, Alignment);
4228 // If there's no space left in the argument save area, we must
4229 // use memory (this check also catches zero-sized arguments).
4230 if (ArgOffset >= LinkageSize + ParamAreaSize)
4231 UseMemory = true;
4232
4233 // Allocate argument on the stack.
4234 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4235 if (Flags.isInConsecutiveRegsLast())
4236 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4237 // If we overran the argument save area, we must use memory
4238 // (this check catches arguments passed partially in memory)
4239 if (ArgOffset > LinkageSize + ParamAreaSize)
4240 UseMemory = true;
4241
4242 // However, if the argument is actually passed in an FPR or a VR,
4243 // we don't use memory after all.
4244 if (!Flags.isByVal()) {
4245 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4246 if (AvailableFPRs > 0) {
4247 --AvailableFPRs;
4248 return false;
4249 }
4250 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4251 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4252 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4253 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4254 if (AvailableVRs > 0) {
4255 --AvailableVRs;
4256 return false;
4257 }
4258 }
4259
4260 return UseMemory;
4261}
4262
4263/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4264/// ensure minimum alignment required for target.
4266 unsigned NumBytes) {
4267 return alignTo(NumBytes, Lowering->getStackAlign());
4268}
4269
4270SDValue PPCTargetLowering::LowerFormalArguments(
4271 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4272 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4273 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4274 if (Subtarget.isAIXABI())
4275 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4276 InVals);
4277 if (Subtarget.is64BitELFABI())
4278 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4279 InVals);
4280 assert(Subtarget.is32BitELFABI());
4281 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4282 InVals);
4283}
4284
4285SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4286 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4287 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4288 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4289
4290 // 32-bit SVR4 ABI Stack Frame Layout:
4291 // +-----------------------------------+
4292 // +--> | Back chain |
4293 // | +-----------------------------------+
4294 // | | Floating-point register save area |
4295 // | +-----------------------------------+
4296 // | | General register save area |
4297 // | +-----------------------------------+
4298 // | | CR save word |
4299 // | +-----------------------------------+
4300 // | | VRSAVE save word |
4301 // | +-----------------------------------+
4302 // | | Alignment padding |
4303 // | +-----------------------------------+
4304 // | | Vector register save area |
4305 // | +-----------------------------------+
4306 // | | Local variable space |
4307 // | +-----------------------------------+
4308 // | | Parameter list area |
4309 // | +-----------------------------------+
4310 // | | LR save word |
4311 // | +-----------------------------------+
4312 // SP--> +--- | Back chain |
4313 // +-----------------------------------+
4314 //
4315 // Specifications:
4316 // System V Application Binary Interface PowerPC Processor Supplement
4317 // AltiVec Technology Programming Interface Manual
4318
4320 MachineFrameInfo &MFI = MF.getFrameInfo();
4321 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4322
4323 EVT PtrVT = getPointerTy(MF.getDataLayout());
4324 // Potential tail calls could cause overwriting of argument stack slots.
4325 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4326 (CallConv == CallingConv::Fast));
4327 const Align PtrAlign(4);
4328
4329 // Assign locations to all of the incoming arguments.
4331 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4332 *DAG.getContext());
4333
4334 // Reserve space for the linkage area on the stack.
4335 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4336 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4337 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4338
4339 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4340 CCValAssign &VA = ArgLocs[i];
4341
4342 // Arguments stored in registers.
4343 if (VA.isRegLoc()) {
4344 const TargetRegisterClass *RC;
4345 EVT ValVT = VA.getValVT();
4346
4347 switch (ValVT.getSimpleVT().SimpleTy) {
4348 default:
4349 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4350 case MVT::i1:
4351 case MVT::i32:
4352 RC = &PPC::GPRCRegClass;
4353 break;
4354 case MVT::f32:
4355 if (Subtarget.hasP8Vector())
4356 RC = &PPC::VSSRCRegClass;
4357 else if (Subtarget.hasSPE())
4358 RC = &PPC::GPRCRegClass;
4359 else
4360 RC = &PPC::F4RCRegClass;
4361 break;
4362 case MVT::f64:
4363 if (Subtarget.hasVSX())
4364 RC = &PPC::VSFRCRegClass;
4365 else if (Subtarget.hasSPE())
4366 // SPE passes doubles in GPR pairs.
4367 RC = &PPC::GPRCRegClass;
4368 else
4369 RC = &PPC::F8RCRegClass;
4370 break;
4371 case MVT::v16i8:
4372 case MVT::v8i16:
4373 case MVT::v4i32:
4374 RC = &PPC::VRRCRegClass;
4375 break;
4376 case MVT::v4f32:
4377 RC = &PPC::VRRCRegClass;
4378 break;
4379 case MVT::v2f64:
4380 case MVT::v2i64:
4381 RC = &PPC::VRRCRegClass;
4382 break;
4383 }
4384
4385 SDValue ArgValue;
4386 // Transform the arguments stored in physical registers into
4387 // virtual ones.
4388 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4389 assert(i + 1 < e && "No second half of double precision argument");
4390 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4391 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4392 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4393 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4394 if (!Subtarget.isLittleEndian())
4395 std::swap (ArgValueLo, ArgValueHi);
4396 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4397 ArgValueHi);
4398 } else {
4399 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4400 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4401 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4402 if (ValVT == MVT::i1)
4403 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4404 }
4405
4406 InVals.push_back(ArgValue);
4407 } else {
4408 // Argument stored in memory.
4409 assert(VA.isMemLoc());
4410
4411 // Get the extended size of the argument type in stack
4412 unsigned ArgSize = VA.getLocVT().getStoreSize();
4413 // Get the actual size of the argument type
4414 unsigned ObjSize = VA.getValVT().getStoreSize();
4415 unsigned ArgOffset = VA.getLocMemOffset();
4416 // Stack objects in PPC32 are right justified.
4417 ArgOffset += ArgSize - ObjSize;
4418 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4419
4420 // Create load nodes to retrieve arguments from the stack.
4421 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4422 InVals.push_back(
4423 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4424 }
4425 }
4426
4427 // Assign locations to all of the incoming aggregate by value arguments.
4428 // Aggregates passed by value are stored in the local variable space of the
4429 // caller's stack frame, right above the parameter list area.
4430 SmallVector<CCValAssign, 16> ByValArgLocs;
4431 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4432 ByValArgLocs, *DAG.getContext());
4433
4434 // Reserve stack space for the allocations in CCInfo.
4435 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4436
4437 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4438
4439 // Area that is at least reserved in the caller of this function.
4440 unsigned MinReservedArea = CCByValInfo.getStackSize();
4441 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4442
4443 // Set the size that is at least reserved in caller of this function. Tail
4444 // call optimized function's reserved stack space needs to be aligned so that
4445 // taking the difference between two stack areas will result in an aligned
4446 // stack.
4447 MinReservedArea =
4448 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4449 FuncInfo->setMinReservedArea(MinReservedArea);
4450
4452
4453 // If the function takes variable number of arguments, make a frame index for
4454 // the start of the first vararg value... for expansion of llvm.va_start.
4455 if (isVarArg) {
4456 static const MCPhysReg GPArgRegs[] = {
4457 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4458 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4459 };
4460 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4461
4462 static const MCPhysReg FPArgRegs[] = {
4463 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4464 PPC::F8
4465 };
4466 unsigned NumFPArgRegs = std::size(FPArgRegs);
4467
4468 if (useSoftFloat() || hasSPE())
4469 NumFPArgRegs = 0;
4470
4471 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4472 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4473
4474 // Make room for NumGPArgRegs and NumFPArgRegs.
4475 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4476 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4477
4479 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4480
4481 FuncInfo->setVarArgsFrameIndex(
4482 MFI.CreateStackObject(Depth, Align(8), false));
4483 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4484
4485 // The fixed integer arguments of a variadic function are stored to the
4486 // VarArgsFrameIndex on the stack so that they may be loaded by
4487 // dereferencing the result of va_next.
4488 for (MCPhysReg GPArgReg : GPArgRegs) {
4489 // Get an existing live-in vreg, or add a new one.
4490 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4491 if (!VReg)
4492 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4493
4494 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4495 SDValue Store =
4496 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4497 MemOps.push_back(Store);
4498 // Increment the address by four for the next argument to store
4499 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4500 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4501 }
4502
4503 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4504 // is set.
4505 // The double arguments are stored to the VarArgsFrameIndex
4506 // on the stack.
4507 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4508 // Get an existing live-in vreg, or add a new one.
4509 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4510 if (!VReg)
4511 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4512
4513 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4514 SDValue Store =
4515 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4516 MemOps.push_back(Store);
4517 // Increment the address by eight for the next argument to store
4518 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4519 PtrVT);
4520 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4521 }
4522 }
4523
4524 if (!MemOps.empty())
4525 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4526
4527 return Chain;
4528}
4529
4530// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4531// value to MVT::i64 and then truncate to the correct register size.
4532SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4533 EVT ObjectVT, SelectionDAG &DAG,
4534 SDValue ArgVal,
4535 const SDLoc &dl) const {
4536 if (Flags.isSExt())
4537 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4538 DAG.getValueType(ObjectVT));
4539 else if (Flags.isZExt())
4540 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4541 DAG.getValueType(ObjectVT));
4542
4543 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4544}
4545
4546SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4547 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4548 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4549 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4550 // TODO: add description of PPC stack frame format, or at least some docs.
4551 //
4552 bool isELFv2ABI = Subtarget.isELFv2ABI();
4553 bool isLittleEndian = Subtarget.isLittleEndian();
4555 MachineFrameInfo &MFI = MF.getFrameInfo();
4556 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4557
4558 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4559 "fastcc not supported on varargs functions");
4560
4561 EVT PtrVT = getPointerTy(MF.getDataLayout());
4562 // Potential tail calls could cause overwriting of argument stack slots.
4563 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4564 (CallConv == CallingConv::Fast));
4565 unsigned PtrByteSize = 8;
4566 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4567
4568 static const MCPhysReg GPR[] = {
4569 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4570 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4571 };
4572 static const MCPhysReg VR[] = {
4573 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4574 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4575 };
4576
4577 const unsigned Num_GPR_Regs = std::size(GPR);
4578 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4579 const unsigned Num_VR_Regs = std::size(VR);
4580
4581 // Do a first pass over the arguments to determine whether the ABI
4582 // guarantees that our caller has allocated the parameter save area
4583 // on its stack frame. In the ELFv1 ABI, this is always the case;
4584 // in the ELFv2 ABI, it is true if this is a vararg function or if
4585 // any parameter is located in a stack slot.
4586
4587 bool HasParameterArea = !isELFv2ABI || isVarArg;
4588 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4589 unsigned NumBytes = LinkageSize;
4590 unsigned AvailableFPRs = Num_FPR_Regs;
4591 unsigned AvailableVRs = Num_VR_Regs;
4592 for (const ISD::InputArg &In : Ins) {
4593 if (In.Flags.isNest())
4594 continue;
4595
4596 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4597 LinkageSize, ParamAreaSize, NumBytes,
4598 AvailableFPRs, AvailableVRs))
4599 HasParameterArea = true;
4600 }
4601
4602 // Add DAG nodes to load the arguments or copy them out of registers. On
4603 // entry to a function on PPC, the arguments start after the linkage area,
4604 // although the first ones are often in registers.
4605
4606 unsigned ArgOffset = LinkageSize;
4607 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4610 unsigned CurArgIdx = 0;
4611 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4612 SDValue ArgVal;
4613 bool needsLoad = false;
4614 EVT ObjectVT = Ins[ArgNo].VT;
4615 EVT OrigVT = Ins[ArgNo].ArgVT;
4616 unsigned ObjSize = ObjectVT.getStoreSize();
4617 unsigned ArgSize = ObjSize;
4618 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4619 if (Ins[ArgNo].isOrigArg()) {
4620 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4621 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4622 }
4623 // We re-align the argument offset for each argument, except when using the
4624 // fast calling convention, when we need to make sure we do that only when
4625 // we'll actually use a stack slot.
4626 unsigned CurArgOffset;
4627 Align Alignment;
4628 auto ComputeArgOffset = [&]() {
4629 /* Respect alignment of argument on the stack. */
4630 Alignment =
4631 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4632 ArgOffset = alignTo(ArgOffset, Alignment);
4633 CurArgOffset = ArgOffset;
4634 };
4635
4636 if (CallConv != CallingConv::Fast) {
4637 ComputeArgOffset();
4638
4639 /* Compute GPR index associated with argument offset. */
4640 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4641 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4642 }
4643
4644 // FIXME the codegen can be much improved in some cases.
4645 // We do not have to keep everything in memory.
4646 if (Flags.isByVal()) {
4647 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4648
4649 if (CallConv == CallingConv::Fast)
4650 ComputeArgOffset();
4651
4652 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4653 ObjSize = Flags.getByValSize();
4654 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4655 // Empty aggregate parameters do not take up registers. Examples:
4656 // struct { } a;
4657 // union { } b;
4658 // int c[0];
4659 // etc. However, we have to provide a place-holder in InVals, so
4660 // pretend we have an 8-byte item at the current address for that
4661 // purpose.
4662 if (!ObjSize) {
4663 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4664 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4665 InVals.push_back(FIN);
4666 continue;
4667 }
4668
4669 // Create a stack object covering all stack doublewords occupied
4670 // by the argument. If the argument is (fully or partially) on
4671 // the stack, or if the argument is fully in registers but the
4672 // caller has allocated the parameter save anyway, we can refer
4673 // directly to the caller's stack frame. Otherwise, create a
4674 // local copy in our own frame.
4675 int FI;
4676 if (HasParameterArea ||
4677 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4678 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4679 else
4680 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4681 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4682
4683 // Handle aggregates smaller than 8 bytes.
4684 if (ObjSize < PtrByteSize) {
4685 // The value of the object is its address, which differs from the
4686 // address of the enclosing doubleword on big-endian systems.
4687 SDValue Arg = FIN;
4688 if (!isLittleEndian) {
4689 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4690 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4691 }
4692 InVals.push_back(Arg);
4693
4694 if (GPR_idx != Num_GPR_Regs) {
4695 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4696 FuncInfo->addLiveInAttr(VReg, Flags);
4697 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4698 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4699 SDValue Store =
4700 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4701 MachinePointerInfo(&*FuncArg), ObjType);
4702 MemOps.push_back(Store);
4703 }
4704 // Whether we copied from a register or not, advance the offset
4705 // into the parameter save area by a full doubleword.
4706 ArgOffset += PtrByteSize;
4707 continue;
4708 }
4709
4710 // The value of the object is its address, which is the address of
4711 // its first stack doubleword.
4712 InVals.push_back(FIN);
4713
4714 // Store whatever pieces of the object are in registers to memory.
4715 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4716 if (GPR_idx == Num_GPR_Regs)
4717 break;
4718
4719 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4720 FuncInfo->addLiveInAttr(VReg, Flags);
4721 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4722 SDValue Addr = FIN;
4723 if (j) {
4724 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4725 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4726 }
4727 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4728 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4729 SDValue Store =
4730 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4731 MachinePointerInfo(&*FuncArg, j), ObjType);
4732 MemOps.push_back(Store);
4733 ++GPR_idx;
4734 }
4735 ArgOffset += ArgSize;
4736 continue;
4737 }
4738
4739 switch (ObjectVT.getSimpleVT().SimpleTy) {
4740 default: llvm_unreachable("Unhandled argument type!");
4741 case MVT::i1:
4742 case MVT::i32:
4743 case MVT::i64:
4744 if (Flags.isNest()) {
4745 // The 'nest' parameter, if any, is passed in R11.
4746 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4747 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4748
4749 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4750 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4751
4752 break;
4753 }
4754
4755 // These can be scalar arguments or elements of an integer array type
4756 // passed directly. Clang may use those instead of "byval" aggregate
4757 // types to avoid forcing arguments to memory unnecessarily.
4758 if (GPR_idx != Num_GPR_Regs) {
4759 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4760 FuncInfo->addLiveInAttr(VReg, Flags);
4761 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4762
4763 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4764 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4765 // value to MVT::i64 and then truncate to the correct register size.
4766 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4767 } else {
4768 if (CallConv == CallingConv::Fast)
4769 ComputeArgOffset();
4770
4771 needsLoad = true;
4772 ArgSize = PtrByteSize;
4773 }
4774 if (CallConv != CallingConv::Fast || needsLoad)
4775 ArgOffset += 8;
4776 break;
4777
4778 case MVT::f32:
4779 case MVT::f64:
4780 // These can be scalar arguments or elements of a float array type
4781 // passed directly. The latter are used to implement ELFv2 homogenous
4782 // float aggregates.
4783 if (FPR_idx != Num_FPR_Regs) {
4784 unsigned VReg;
4785
4786 if (ObjectVT == MVT::f32)
4787 VReg = MF.addLiveIn(FPR[FPR_idx],
4788 Subtarget.hasP8Vector()
4789 ? &PPC::VSSRCRegClass
4790 : &PPC::F4RCRegClass);
4791 else
4792 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4793 ? &PPC::VSFRCRegClass
4794 : &PPC::F8RCRegClass);
4795
4796 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4797 ++FPR_idx;
4798 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4799 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4800 // once we support fp <-> gpr moves.
4801
4802 // This can only ever happen in the presence of f32 array types,
4803 // since otherwise we never run out of FPRs before running out
4804 // of GPRs.
4805 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4806 FuncInfo->addLiveInAttr(VReg, Flags);
4807 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4808
4809 if (ObjectVT == MVT::f32) {
4810 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4811 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4812 DAG.getConstant(32, dl, MVT::i32));
4813 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4814 }
4815
4816 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4817 } else {
4818 if (CallConv == CallingConv::Fast)
4819 ComputeArgOffset();
4820
4821 needsLoad = true;
4822 }
4823
4824 // When passing an array of floats, the array occupies consecutive
4825 // space in the argument area; only round up to the next doubleword
4826 // at the end of the array. Otherwise, each float takes 8 bytes.
4827 if (CallConv != CallingConv::Fast || needsLoad) {
4828 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4829 ArgOffset += ArgSize;
4830 if (Flags.isInConsecutiveRegsLast())
4831 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4832 }
4833 break;
4834 case MVT::v4f32:
4835 case MVT::v4i32:
4836 case MVT::v8i16:
4837 case MVT::v16i8:
4838 case MVT::v2f64:
4839 case MVT::v2i64:
4840 case MVT::v1i128:
4841 case MVT::f128:
4842 // These can be scalar arguments or elements of a vector array type
4843 // passed directly. The latter are used to implement ELFv2 homogenous
4844 // vector aggregates.
4845 if (VR_idx != Num_VR_Regs) {
4846 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4847 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4848 ++VR_idx;
4849 } else {
4850 if (CallConv == CallingConv::Fast)
4851 ComputeArgOffset();
4852 needsLoad = true;
4853 }
4854 if (CallConv != CallingConv::Fast || needsLoad)
4855 ArgOffset += 16;
4856 break;
4857 }
4858
4859 // We need to load the argument to a virtual register if we determined
4860 // above that we ran out of physical registers of the appropriate type.
4861 if (needsLoad) {
4862 if (ObjSize < ArgSize && !isLittleEndian)
4863 CurArgOffset += ArgSize - ObjSize;
4864 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4865 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4866 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4867 }
4868
4869 InVals.push_back(ArgVal);
4870 }
4871
4872 // Area that is at least reserved in the caller of this function.
4873 unsigned MinReservedArea;
4874 if (HasParameterArea)
4875 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4876 else
4877 MinReservedArea = LinkageSize;
4878
4879 // Set the size that is at least reserved in caller of this function. Tail
4880 // call optimized functions' reserved stack space needs to be aligned so that
4881 // taking the difference between two stack areas will result in an aligned
4882 // stack.
4883 MinReservedArea =
4884 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4885 FuncInfo->setMinReservedArea(MinReservedArea);
4886
4887 // If the function takes variable number of arguments, make a frame index for
4888 // the start of the first vararg value... for expansion of llvm.va_start.
4889 // On ELFv2ABI spec, it writes:
4890 // C programs that are intended to be *portable* across different compilers
4891 // and architectures must use the header file <stdarg.h> to deal with variable
4892 // argument lists.
4893 if (isVarArg && MFI.hasVAStart()) {
4894 int Depth = ArgOffset;
4895
4896 FuncInfo->setVarArgsFrameIndex(
4897 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4898 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4899
4900 // If this function is vararg, store any remaining integer argument regs
4901 // to their spots on the stack so that they may be loaded by dereferencing
4902 // the result of va_next.
4903 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4904 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4905 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4906 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4907 SDValue Store =
4908 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4909 MemOps.push_back(Store);
4910 // Increment the address by four for the next argument to store
4911 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4912 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4913 }
4914 }
4915
4916 if (!MemOps.empty())
4917 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4918
4919 return Chain;
4920}
4921
4922/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4923/// adjusted to accommodate the arguments for the tailcall.
4924static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4925 unsigned ParamSize) {
4926
4927 if (!isTailCall) return 0;
4928
4930 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4931 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4932 // Remember only if the new adjustment is bigger.
4933 if (SPDiff < FI->getTailCallSPDelta())
4934 FI->setTailCallSPDelta(SPDiff);
4935
4936 return SPDiff;
4937}
4938
4939static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4940
4941static bool callsShareTOCBase(const Function *Caller,
4942 const GlobalValue *CalleeGV,
4943 const TargetMachine &TM) {
4944 // It does not make sense to call callsShareTOCBase() with a caller that
4945 // is PC Relative since PC Relative callers do not have a TOC.
4946#ifndef NDEBUG
4947 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4948 assert(!STICaller->isUsingPCRelativeCalls() &&
4949 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4950#endif
4951
4952 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4953 // don't have enough information to determine if the caller and callee share
4954 // the same TOC base, so we have to pessimistically assume they don't for
4955 // correctness.
4956 if (!CalleeGV)
4957 return false;
4958
4959 // If the callee is preemptable, then the static linker will use a plt-stub
4960 // which saves the toc to the stack, and needs a nop after the call
4961 // instruction to convert to a toc-restore.
4962 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4963 return false;
4964
4965 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4966 // We may need a TOC restore in the situation where the caller requires a
4967 // valid TOC but the callee is PC Relative and does not.
4968 const Function *F = dyn_cast<Function>(CalleeGV);
4969 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4970
4971 // If we have an Alias we can try to get the function from there.
4972 if (Alias) {
4973 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4974 F = dyn_cast<Function>(GlobalObj);
4975 }
4976
4977 // If we still have no valid function pointer we do not have enough
4978 // information to determine if the callee uses PC Relative calls so we must
4979 // assume that it does.
4980 if (!F)
4981 return false;
4982
4983 // If the callee uses PC Relative we cannot guarantee that the callee won't
4984 // clobber the TOC of the caller and so we must assume that the two
4985 // functions do not share a TOC base.
4986 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4987 if (STICallee->isUsingPCRelativeCalls())
4988 return false;
4989
4990 // If the GV is not a strong definition then we need to assume it can be
4991 // replaced by another function at link time. The function that replaces
4992 // it may not share the same TOC as the caller since the callee may be
4993 // replaced by a PC Relative version of the same function.
4994 if (!CalleeGV->isStrongDefinitionForLinker())
4995 return false;
4996
4997 // The medium and large code models are expected to provide a sufficiently
4998 // large TOC to provide all data addressing needs of a module with a
4999 // single TOC.
5000 if (CodeModel::Medium == TM.getCodeModel() ||
5001 CodeModel::Large == TM.getCodeModel())
5002 return true;
5003
5004 // Any explicitly-specified sections and section prefixes must also match.
5005 // Also, if we're using -ffunction-sections, then each function is always in
5006 // a different section (the same is true for COMDAT functions).
5007 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
5008 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
5009 return false;
5010 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
5011 if (F->getSectionPrefix() != Caller->getSectionPrefix())
5012 return false;
5013 }
5014
5015 return true;
5016}
5017
5018static bool
5020 const SmallVectorImpl<ISD::OutputArg> &Outs) {
5021 assert(Subtarget.is64BitELFABI());
5022
5023 const unsigned PtrByteSize = 8;
5024 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5025
5026 static const MCPhysReg GPR[] = {
5027 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5028 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5029 };
5030 static const MCPhysReg VR[] = {
5031 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5032 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5033 };
5034
5035 const unsigned NumGPRs = std::size(GPR);
5036 const unsigned NumFPRs = 13;
5037 const unsigned NumVRs = std::size(VR);
5038 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5039
5040 unsigned NumBytes = LinkageSize;
5041 unsigned AvailableFPRs = NumFPRs;
5042 unsigned AvailableVRs = NumVRs;
5043
5044 for (const ISD::OutputArg& Param : Outs) {
5045 if (Param.Flags.isNest()) continue;
5046
5047 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5048 LinkageSize, ParamAreaSize, NumBytes,
5049 AvailableFPRs, AvailableVRs))
5050 return true;
5051 }
5052 return false;
5053}
5054
5055static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5056 if (CB.arg_size() != CallerFn->arg_size())
5057 return false;
5058
5059 auto CalleeArgIter = CB.arg_begin();
5060 auto CalleeArgEnd = CB.arg_end();
5061 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5062
5063 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5064 const Value* CalleeArg = *CalleeArgIter;
5065 const Value* CallerArg = &(*CallerArgIter);
5066 if (CalleeArg == CallerArg)
5067 continue;
5068
5069 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5070 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5071 // }
5072 // 1st argument of callee is undef and has the same type as caller.
5073 if (CalleeArg->getType() == CallerArg->getType() &&
5074 isa<UndefValue>(CalleeArg))
5075 continue;
5076
5077 return false;
5078 }
5079
5080 return true;
5081}
5082
5083// Returns true if TCO is possible between the callers and callees
5084// calling conventions.
5085static bool
5087 CallingConv::ID CalleeCC) {
5088 // Tail calls are possible with fastcc and ccc.
5089 auto isTailCallableCC = [] (CallingConv::ID CC){
5090 return CC == CallingConv::C || CC == CallingConv::Fast;
5091 };
5092 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5093 return false;
5094
5095 // We can safely tail call both fastcc and ccc callees from a c calling
5096 // convention caller. If the caller is fastcc, we may have less stack space
5097 // than a non-fastcc caller with the same signature so disable tail-calls in
5098 // that case.
5099 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5100}
5101
5102bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5103 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5104 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5106 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5107 bool isCalleeExternalSymbol) const {
5108 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5109
5110 if (DisableSCO && !TailCallOpt) return false;
5111
5112 // Variadic argument functions are not supported.
5113 if (isVarArg) return false;
5114
5115 // Check that the calling conventions are compatible for tco.
5116 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5117 return false;
5118
5119 // Caller contains any byval parameter is not supported.
5120 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5121 return false;
5122
5123 // Callee contains any byval parameter is not supported, too.
5124 // Note: This is a quick work around, because in some cases, e.g.
5125 // caller's stack size > callee's stack size, we are still able to apply
5126 // sibling call optimization. For example, gcc is able to do SCO for caller1
5127 // in the following example, but not for caller2.
5128 // struct test {
5129 // long int a;
5130 // char ary[56];
5131 // } gTest;
5132 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5133 // b->a = v.a;
5134 // return 0;
5135 // }
5136 // void caller1(struct test a, struct test c, struct test *b) {
5137 // callee(gTest, b); }
5138 // void caller2(struct test *b) { callee(gTest, b); }
5139 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5140 return false;
5141
5142 // If callee and caller use different calling conventions, we cannot pass
5143 // parameters on stack since offsets for the parameter area may be different.
5144 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5145 return false;
5146
5147 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5148 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5149 // callee potentially have different TOC bases then we cannot tail call since
5150 // we need to restore the TOC pointer after the call.
5151 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5152 // We cannot guarantee this for indirect calls or calls to external functions.
5153 // When PC-Relative addressing is used, the concept of the TOC is no longer
5154 // applicable so this check is not required.
5155 // Check first for indirect calls.
5156 if (!Subtarget.isUsingPCRelativeCalls() &&
5157 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5158 return false;
5159
5160 // Check if we share the TOC base.
5161 if (!Subtarget.isUsingPCRelativeCalls() &&
5162 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5163 return false;
5164
5165 // TCO allows altering callee ABI, so we don't have to check further.
5166 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5167 return true;
5168
5169 if (DisableSCO) return false;
5170
5171 // If callee use the same argument list that caller is using, then we can
5172 // apply SCO on this case. If it is not, then we need to check if callee needs
5173 // stack for passing arguments.
5174 // PC Relative tail calls may not have a CallBase.
5175 // If there is no CallBase we cannot verify if we have the same argument
5176 // list so assume that we don't have the same argument list.
5177 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5178 needStackSlotPassParameters(Subtarget, Outs))
5179 return false;
5180 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5181 return false;
5182
5183 return true;
5184}
5185
5186/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5187/// for tail call optimization. Targets which want to do tail call
5188/// optimization should implement this function.
5189bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5190 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5191 CallingConv::ID CallerCC, bool isVarArg,
5192 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5193 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5194 return false;
5195
5196 // Variable argument functions are not supported.
5197 if (isVarArg)
5198 return false;
5199
5200 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5201 // Functions containing by val parameters are not supported.
5202 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5203 return false;
5204
5205 // Non-PIC/GOT tail calls are supported.
5206 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5207 return true;
5208
5209 // At the moment we can only do local tail calls (in same module, hidden
5210 // or protected) if we are generating PIC.
5211 if (CalleeGV)
5212 return CalleeGV->hasHiddenVisibility() ||
5213 CalleeGV->hasProtectedVisibility();
5214 }
5215
5216 return false;
5217}
5218
5219/// isCallCompatibleAddress - Return the immediate to use if the specified
5220/// 32-bit value is representable in the immediate field of a BxA instruction.
5222 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5223 if (!C) return nullptr;
5224
5225 int Addr = C->getZExtValue();
5226 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5227 SignExtend32<26>(Addr) != Addr)
5228 return nullptr; // Top 6 bits have to be sext of immediate.
5229
5230 return DAG
5232 (int)C->getZExtValue() >> 2, SDLoc(Op),
5234 .getNode();
5235}
5236
5237namespace {
5238
5239struct TailCallArgumentInfo {
5240 SDValue Arg;
5241 SDValue FrameIdxOp;
5242 int FrameIdx = 0;
5243
5244 TailCallArgumentInfo() = default;
5245};
5246
5247} // end anonymous namespace
5248
5249/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5251 SelectionDAG &DAG, SDValue Chain,
5252 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5253 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5254 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5255 SDValue Arg = TailCallArgs[i].Arg;
5256 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5257 int FI = TailCallArgs[i].FrameIdx;
5258 // Store relative to framepointer.
5259 MemOpChains.push_back(DAG.getStore(
5260 Chain, dl, Arg, FIN,
5262 }
5263}
5264
5265/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5266/// the appropriate stack slot for the tail call optimized function call.
5268 SDValue OldRetAddr, SDValue OldFP,
5269 int SPDiff, const SDLoc &dl) {
5270 if (SPDiff) {
5271 // Calculate the new stack slot for the return address.
5273 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5274 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5275 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5276 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5277 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5278 NewRetAddrLoc, true);
5279 SDValue NewRetAddrFrIdx =
5280 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5281 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5282 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5283 }
5284 return Chain;
5285}
5286
5287/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5288/// the position of the argument.
5290 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5291 int SPDiff, unsigned ArgOffset,
5292 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5293 int Offset = ArgOffset + SPDiff;
5294 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5295 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5296 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5297 SDValue FIN = DAG.getFrameIndex(FI, VT);
5298 TailCallArgumentInfo Info;
5299 Info.Arg = Arg;
5300 Info.FrameIdxOp = FIN;
5301 Info.FrameIdx = FI;
5302 TailCallArguments.push_back(Info);
5303}
5304
5305/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5306/// stack slot. Returns the chain as result and the loaded frame pointers in
5307/// LROpOut/FPOpout. Used when tail calling.
5308SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5309 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5310 SDValue &FPOpOut, const SDLoc &dl) const {
5311 if (SPDiff) {
5312 // Load the LR and FP stack slot for later adjusting.
5313 LROpOut = getReturnAddrFrameIndex(DAG);
5314 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5316 Chain = SDValue(LROpOut.getNode(), 1);
5317 }
5318 return Chain;
5319}
5320
5321/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5322/// by "Src" to address "Dst" of size "Size". Alignment information is
5323/// specified by the specific parameter attribute. The copy will be passed as
5324/// a byval function parameter.
5325/// Sometimes what we are copying is the end of a larger object, the part that
5326/// does not fit in registers.
5328 SDValue Chain, ISD::ArgFlagsTy Flags,
5329 SelectionDAG &DAG, const SDLoc &dl) {
5330 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5331 return DAG.getMemcpy(
5332 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5333 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5334}
5335
5336/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5337/// tail calls.
5339 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5340 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5341 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5342 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5344 if (!isTailCall) {
5345 if (isVector) {
5346 SDValue StackPtr;
5347 if (isPPC64)
5348 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5349 else
5350 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5351 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5352 DAG.getConstant(ArgOffset, dl, PtrVT));
5353 }
5354 MemOpChains.push_back(
5355 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5356 // Calculate and remember argument location.
5357 } else
5358 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5359 TailCallArguments);
5360}
5361
5362static void
5364 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5365 SDValue FPOp,
5366 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5367 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5368 // might overwrite each other in case of tail call optimization.
5369 SmallVector<SDValue, 8> MemOpChains2;
5370 // Do not flag preceding copytoreg stuff together with the following stuff.
5371 InGlue = SDValue();
5372 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5373 MemOpChains2, dl);
5374 if (!MemOpChains2.empty())
5375 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5376
5377 // Store the return address to the appropriate stack slot.
5378 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5379
5380 // Emit callseq_end just before tailcall node.
5381 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5382 InGlue = Chain.getValue(1);
5383}
5384
5385// Is this global address that of a function that can be called by name? (as
5386// opposed to something that must hold a descriptor for an indirect call).
5387static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5388 if (GV) {
5389 if (GV->isThreadLocal())
5390 return false;
5391
5392 return GV->getValueType()->isFunctionTy();
5393 }
5394
5395 return false;
5396}
5397
5398SDValue PPCTargetLowering::LowerCallResult(
5399 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5400 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5401 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5403 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5404 *DAG.getContext());
5405
5406 CCRetInfo.AnalyzeCallResult(
5407 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5409 : RetCC_PPC);
5410
5411 // Copy all of the result registers out of their specified physreg.
5412 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5413 CCValAssign &VA = RVLocs[i];
5414 assert(VA.isRegLoc() && "Can only return in registers!");
5415
5416 SDValue Val;
5417
5418 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5419 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5420 InGlue);
5421 Chain = Lo.getValue(1);
5422 InGlue = Lo.getValue(2);
5423 VA = RVLocs[++i]; // skip ahead to next loc
5424 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5425 InGlue);
5426 Chain = Hi.getValue(1);
5427 InGlue = Hi.getValue(2);
5428 if (!Subtarget.isLittleEndian())
5429 std::swap (Lo, Hi);
5430 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5431 } else {
5432 Val = DAG.getCopyFromReg(Chain, dl,
5433 VA.getLocReg(), VA.getLocVT(), InGlue);
5434 Chain = Val.getValue(1);
5435 InGlue = Val.getValue(2);
5436 }
5437
5438 switch (VA.getLocInfo()) {
5439 default: llvm_unreachable("Unknown loc info!");
5440 case CCValAssign::Full: break;
5441 case CCValAssign::AExt:
5442 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5443 break;
5444 case CCValAssign::ZExt:
5445 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5446 DAG.getValueType(VA.getValVT()));
5447 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5448 break;
5449 case CCValAssign::SExt:
5450 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5451 DAG.getValueType(VA.getValVT()));
5452 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5453 break;
5454 }
5455
5456 InVals.push_back(Val);
5457 }
5458
5459 return Chain;
5460}
5461
5462static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5463 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5464 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5465 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5466
5467 // PatchPoint calls are not indirect.
5468 if (isPatchPoint)
5469 return false;
5470
5471 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5472 return false;
5473
5474 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5475 // becuase the immediate function pointer points to a descriptor instead of
5476 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5477 // pointer immediate points to the global entry point, while the BLA would
5478 // need to jump to the local entry point (see rL211174).
5479 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5480 isBLACompatibleAddress(Callee, DAG))
5481 return false;
5482
5483 return true;
5484}
5485
5486// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5487static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5488 return Subtarget.isAIXABI() ||
5489 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5490}
5491
5493 const Function &Caller, const SDValue &Callee,
5494 const PPCSubtarget &Subtarget,
5495 const TargetMachine &TM,
5496 bool IsStrictFPCall = false) {
5497 if (CFlags.IsTailCall)
5498 return PPCISD::TC_RETURN;
5499
5500 unsigned RetOpc = 0;
5501 // This is a call through a function pointer.
5502 if (CFlags.IsIndirect) {
5503 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5504 // indirect calls. The save of the caller's TOC pointer to the stack will be
5505 // inserted into the DAG as part of call lowering. The restore of the TOC
5506 // pointer is modeled by using a pseudo instruction for the call opcode that
5507 // represents the 2 instruction sequence of an indirect branch and link,
5508 // immediately followed by a load of the TOC pointer from the stack save
5509 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5510 // as it is not saved or used.
5512 : PPCISD::BCTRL;
5513 } else if (Subtarget.isUsingPCRelativeCalls()) {
5514 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5515 RetOpc = PPCISD::CALL_NOTOC;
5516 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5517 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5518 // immediately following the call instruction if the caller and callee may
5519 // have different TOC bases. At link time if the linker determines the calls
5520 // may not share a TOC base, the call is redirected to a trampoline inserted
5521 // by the linker. The trampoline will (among other things) save the callers
5522 // TOC pointer at an ABI designated offset in the linkage area and the
5523 // linker will rewrite the nop to be a load of the TOC pointer from the
5524 // linkage area into gpr2.
5525 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5526 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5527 RetOpc =
5528 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5529 } else
5530 RetOpc = PPCISD::CALL;
5531 if (IsStrictFPCall) {
5532 switch (RetOpc) {
5533 default:
5534 llvm_unreachable("Unknown call opcode");
5537 break;
5538 case PPCISD::BCTRL:
5539 RetOpc = PPCISD::BCTRL_RM;
5540 break;
5541 case PPCISD::CALL_NOTOC:
5542 RetOpc = PPCISD::CALL_NOTOC_RM;
5543 break;
5544 case PPCISD::CALL:
5545 RetOpc = PPCISD::CALL_RM;
5546 break;
5547 case PPCISD::CALL_NOP:
5548 RetOpc = PPCISD::CALL_NOP_RM;
5549 break;
5550 }
5551 }
5552 return RetOpc;
5553}
5554
5555static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5556 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5557 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5558 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5559 return SDValue(Dest, 0);
5560
5561 // Returns true if the callee is local, and false otherwise.
5562 auto isLocalCallee = [&]() {
5563 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5564 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5565
5566 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5567 !isa_and_nonnull<GlobalIFunc>(GV);
5568 };
5569
5570 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5571 // a static relocation model causes some versions of GNU LD (2.17.50, at
5572 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5573 // built with secure-PLT.
5574 bool UsePlt =
5575 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5577
5578 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5579 const TargetMachine &TM = Subtarget.getTargetMachine();
5580 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5581 auto *S =
5582 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5583
5585 return DAG.getMCSymbol(S, PtrVT);
5586 };
5587
5588 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5589 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5590 if (isFunctionGlobalAddress(GV)) {
5591 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5592
5593 if (Subtarget.isAIXABI()) {
5594 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5595 return getAIXFuncEntryPointSymbolSDNode(GV);
5596 }
5597 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5598 UsePlt ? PPCII::MO_PLT : 0);
5599 }
5600
5601 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5602 const char *SymName = S->getSymbol();
5603 if (Subtarget.isAIXABI()) {
5604 // If there exists a user-declared function whose name is the same as the
5605 // ExternalSymbol's, then we pick up the user-declared version.
5607 if (const Function *F =
5608 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5609 return getAIXFuncEntryPointSymbolSDNode(F);
5610
5611 // On AIX, direct function calls reference the symbol for the function's
5612 // entry point, which is named by prepending a "." before the function's
5613 // C-linkage name. A Qualname is returned here because an external
5614 // function entry point is a csect with XTY_ER property.
5615 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5616 auto &Context = DAG.getMachineFunction().getContext();
5617 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5618 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5620 return Sec->getQualNameSymbol();
5621 };
5622
5623 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5624 }
5625 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5626 UsePlt ? PPCII::MO_PLT : 0);
5627 }
5628
5629 // No transformation needed.
5630 assert(Callee.getNode() && "What no callee?");
5631 return Callee;
5632}
5633
5635 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5636 "Expected a CALLSEQ_STARTSDNode.");
5637
5638 // The last operand is the chain, except when the node has glue. If the node
5639 // has glue, then the last operand is the glue, and the chain is the second
5640 // last operand.
5641 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5642 if (LastValue.getValueType() != MVT::Glue)
5643 return LastValue;
5644
5645 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5646}
5647
5648// Creates the node that moves a functions address into the count register
5649// to prepare for an indirect call instruction.
5650static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5651 SDValue &Glue, SDValue &Chain,
5652 const SDLoc &dl) {
5653 SDValue MTCTROps[] = {Chain, Callee, Glue};
5654 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5655 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5656 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5657 // The glue is the second value produced.
5658 Glue = Chain.getValue(1);
5659}
5660
5662 SDValue &Glue, SDValue &Chain,
5663 SDValue CallSeqStart,
5664 const CallBase *CB, const SDLoc &dl,
5665 bool hasNest,
5666 const PPCSubtarget &Subtarget) {
5667 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5668 // entry point, but to the function descriptor (the function entry point
5669 // address is part of the function descriptor though).
5670 // The function descriptor is a three doubleword structure with the
5671 // following fields: function entry point, TOC base address and
5672 // environment pointer.
5673 // Thus for a call through a function pointer, the following actions need
5674 // to be performed:
5675 // 1. Save the TOC of the caller in the TOC save area of its stack
5676 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5677 // 2. Load the address of the function entry point from the function
5678 // descriptor.
5679 // 3. Load the TOC of the callee from the function descriptor into r2.
5680 // 4. Load the environment pointer from the function descriptor into
5681 // r11.
5682 // 5. Branch to the function entry point address.
5683 // 6. On return of the callee, the TOC of the caller needs to be
5684 // restored (this is done in FinishCall()).
5685 //
5686 // The loads are scheduled at the beginning of the call sequence, and the
5687 // register copies are flagged together to ensure that no other
5688 // operations can be scheduled in between. E.g. without flagging the
5689 // copies together, a TOC access in the caller could be scheduled between
5690 // the assignment of the callee TOC and the branch to the callee, which leads
5691 // to incorrect code.
5692
5693 // Start by loading the function address from the descriptor.
5694 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5695 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5699
5700 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5701
5702 // Registers used in building the DAG.
5703 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5704 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5705
5706 // Offsets of descriptor members.
5707 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5708 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5709
5710 const MVT RegVT = Subtarget.getScalarIntVT();
5711 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5712
5713 // One load for the functions entry point address.
5714 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5715 Alignment, MMOFlags);
5716
5717 // One for loading the TOC anchor for the module that contains the called
5718 // function.
5719 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5720 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5721 SDValue TOCPtr =
5722 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5723 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5724
5725 // One for loading the environment pointer.
5726 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5727 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5728 SDValue LoadEnvPtr =
5729 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5730 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5731
5732
5733 // Then copy the newly loaded TOC anchor to the TOC pointer.
5734 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5735 Chain = TOCVal.getValue(0);
5736 Glue = TOCVal.getValue(1);
5737
5738 // If the function call has an explicit 'nest' parameter, it takes the
5739 // place of the environment pointer.
5740 assert((!hasNest || !Subtarget.isAIXABI()) &&
5741 "Nest parameter is not supported on AIX.");
5742 if (!hasNest) {
5743 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5744 Chain = EnvVal.getValue(0);
5745 Glue = EnvVal.getValue(1);
5746 }
5747
5748 // The rest of the indirect call sequence is the same as the non-descriptor
5749 // DAG.
5750 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5751}
5752
5753static void
5755 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5756 SelectionDAG &DAG,
5757 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5758 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5759 const PPCSubtarget &Subtarget) {
5760 const bool IsPPC64 = Subtarget.isPPC64();
5761 // MVT for a general purpose register.
5762 const MVT RegVT = Subtarget.getScalarIntVT();
5763
5764 // First operand is always the chain.
5765 Ops.push_back(Chain);
5766
5767 // If it's a direct call pass the callee as the second operand.
5768 if (!CFlags.IsIndirect)
5769 Ops.push_back(Callee);
5770 else {
5771 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5772
5773 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5774 // on the stack (this would have been done in `LowerCall_64SVR4` or
5775 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5776 // represents both the indirect branch and a load that restores the TOC
5777 // pointer from the linkage area. The operand for the TOC restore is an add
5778 // of the TOC save offset to the stack pointer. This must be the second
5779 // operand: after the chain input but before any other variadic arguments.
5780 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5781 // saved or used.
5782 if (isTOCSaveRestoreRequired(Subtarget)) {
5783 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5784
5785 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5786 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5787 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5788 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5789 Ops.push_back(AddTOC);
5790 }
5791
5792 // Add the register used for the environment pointer.
5793 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5795 RegVT));
5796
5797
5798 // Add CTR register as callee so a bctr can be emitted later.
5799 if (CFlags.IsTailCall)
5800 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5801 }
5802
5803 // If this is a tail call add stack pointer delta.
5804 if (CFlags.IsTailCall)
5805 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5806
5807 // Add argument registers to the end of the list so that they are known live
5808 // into the call.
5809 for (const auto &[Reg, N] : RegsToPass)
5810 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5811
5812 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5813 // no way to mark dependencies as implicit here.
5814 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5815 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5816 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5817 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5818
5819 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5820 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5821 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5822
5823 // Add a register mask operand representing the call-preserved registers.
5824 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5825 const uint32_t *Mask =
5826 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5827 assert(Mask && "Missing call preserved mask for calling convention");
5828 Ops.push_back(DAG.getRegisterMask(Mask));
5829
5830 // If the glue is valid, it is the last operand.
5831 if (Glue.getNode())
5832 Ops.push_back(Glue);
5833}
5834
5835SDValue PPCTargetLowering::FinishCall(
5836 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5837 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5838 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5839 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5840 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5841
5842 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5843 Subtarget.isAIXABI())
5844 setUsesTOCBasePtr(DAG);
5845
5846 unsigned CallOpc =
5847 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5848 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5849
5850 if (!CFlags.IsIndirect)
5851 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5852 else if (Subtarget.usesFunctionDescriptors())
5853 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5854 dl, CFlags.HasNest, Subtarget);
5855 else
5856 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5857
5858 // Build the operand list for the call instruction.
5860 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5861 SPDiff, Subtarget);
5862
5863 // Emit tail call.
5864 if (CFlags.IsTailCall) {
5865 // Indirect tail call when using PC Relative calls do not have the same
5866 // constraints.
5867 assert(((Callee.getOpcode() == ISD::Register &&
5868 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5869 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5870 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5871 isa<ConstantSDNode>(Callee) ||
5872 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5873 "Expecting a global address, external symbol, absolute value, "
5874 "register or an indirect tail call when PC Relative calls are "
5875 "used.");
5876 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5877 assert(CallOpc == PPCISD::TC_RETURN &&
5878 "Unexpected call opcode for a tail call.");
5880 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5881 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5882 return Ret;
5883 }
5884
5885 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5886 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5887 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5888 Glue = Chain.getValue(1);
5889
5890 // When performing tail call optimization the callee pops its arguments off
5891 // the stack. Account for this here so these bytes can be pushed back on in
5892 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5893 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5895 ? NumBytes
5896 : 0;
5897
5898 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5899 Glue = Chain.getValue(1);
5900
5901 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5902 DAG, InVals);
5903}
5904
5906 CallingConv::ID CalleeCC = CB->getCallingConv();
5907 const Function *CallerFunc = CB->getCaller();
5908 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5909 const Function *CalleeFunc = CB->getCalledFunction();
5910 if (!CalleeFunc)
5911 return false;
5912 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5913
5916
5917 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5918 CalleeFunc->getAttributes(), Outs, *this,
5919 CalleeFunc->getDataLayout());
5920
5921 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5922 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5923 false /*isCalleeExternalSymbol*/);
5924}
5925
5926bool PPCTargetLowering::isEligibleForTCO(
5927 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5928 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5930 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5931 bool isCalleeExternalSymbol) const {
5932 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5933 return false;
5934
5935 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5936 return IsEligibleForTailCallOptimization_64SVR4(
5937 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5938 isCalleeExternalSymbol);
5939 else
5940 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5941 isVarArg, Ins);
5942}
5943
5944SDValue
5945PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5946 SmallVectorImpl<SDValue> &InVals) const {
5947 SelectionDAG &DAG = CLI.DAG;
5948 SDLoc &dl = CLI.DL;
5950 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5952 SDValue Chain = CLI.Chain;
5953 SDValue Callee = CLI.Callee;
5954 bool &isTailCall = CLI.IsTailCall;
5955 CallingConv::ID CallConv = CLI.CallConv;
5956 bool isVarArg = CLI.IsVarArg;
5957 bool isPatchPoint = CLI.IsPatchPoint;
5958 const CallBase *CB = CLI.CB;
5959
5960 if (isTailCall) {
5962 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5963 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5964 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5965 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5966
5967 isTailCall =
5968 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5969 &(MF.getFunction()), IsCalleeExternalSymbol);
5970 if (isTailCall) {
5971 ++NumTailCalls;
5972 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5973 ++NumSiblingCalls;
5974
5975 // PC Relative calls no longer guarantee that the callee is a Global
5976 // Address Node. The callee could be an indirect tail call in which
5977 // case the SDValue for the callee could be a load (to load the address
5978 // of a function pointer) or it may be a register copy (to move the
5979 // address of the callee from a function parameter into a virtual
5980 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5981 assert((Subtarget.isUsingPCRelativeCalls() ||
5982 isa<GlobalAddressSDNode>(Callee)) &&
5983 "Callee should be an llvm::Function object.");
5984
5985 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5986 << "\nTCO callee: ");
5987 LLVM_DEBUG(Callee.dump());
5988 }
5989 }
5990
5991 if (!isTailCall && CB && CB->isMustTailCall())
5992 report_fatal_error("failed to perform tail call elimination on a call "
5993 "site marked musttail");
5994
5995 // When long calls (i.e. indirect calls) are always used, calls are always
5996 // made via function pointer. If we have a function name, first translate it
5997 // into a pointer.
5998 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5999 !isTailCall)
6000 Callee = LowerGlobalAddress(Callee, DAG);
6001
6002 CallFlags CFlags(
6003 CallConv, isTailCall, isVarArg, isPatchPoint,
6004 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
6005 // hasNest
6006 Subtarget.is64BitELFABI() &&
6007 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
6008 CLI.NoMerge);
6009
6010 if (Subtarget.isAIXABI())
6011 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6012 InVals, CB);
6013
6014 assert(Subtarget.isSVR4ABI());
6015 if (Subtarget.isPPC64())
6016 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6017 InVals, CB);
6018 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6019 InVals, CB);
6020}
6021
6022SDValue PPCTargetLowering::LowerCall_32SVR4(
6023 SDValue Chain, SDValue Callee, CallFlags CFlags,
6025 const SmallVectorImpl<SDValue> &OutVals,
6026 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6028 const CallBase *CB) const {
6029 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
6030 // of the 32-bit SVR4 ABI stack frame layout.
6031
6032 const CallingConv::ID CallConv = CFlags.CallConv;
6033 const bool IsVarArg = CFlags.IsVarArg;
6034 const bool IsTailCall = CFlags.IsTailCall;
6035
6036 assert((CallConv == CallingConv::C ||
6037 CallConv == CallingConv::Cold ||
6038 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6039
6040 const Align PtrAlign(4);
6041
6043
6044 // Mark this function as potentially containing a function that contains a
6045 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6046 // and restoring the callers stack pointer in this functions epilog. This is
6047 // done because by tail calling the called function might overwrite the value
6048 // in this function's (MF) stack pointer stack slot 0(SP).
6049 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6050 CallConv == CallingConv::Fast)
6051 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6052
6053 // Count how many bytes are to be pushed on the stack, including the linkage
6054 // area, parameter list area and the part of the local variable space which
6055 // contains copies of aggregates which are passed by value.
6056
6057 // Assign locations to all of the outgoing arguments.
6059 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6060
6061 // Reserve space for the linkage area on the stack.
6062 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6063 PtrAlign);
6064
6065 if (IsVarArg) {
6066 // Handle fixed and variable vector arguments differently.
6067 // Fixed vector arguments go into registers as long as registers are
6068 // available. Variable vector arguments always go into memory.
6069 unsigned NumArgs = Outs.size();
6070
6071 for (unsigned i = 0; i != NumArgs; ++i) {
6072 MVT ArgVT = Outs[i].VT;
6073 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6074 bool Result;
6075
6076 if (!ArgFlags.isVarArg()) {
6077 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6078 Outs[i].OrigTy, CCInfo);
6079 } else {
6081 ArgFlags, Outs[i].OrigTy, CCInfo);
6082 }
6083
6084 if (Result) {
6085#ifndef NDEBUG
6086 errs() << "Call operand #" << i << " has unhandled type "
6087 << ArgVT << "\n";
6088#endif
6089 llvm_unreachable(nullptr);
6090 }
6091 }
6092 } else {
6093 // All arguments are treated the same.
6094 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6095 }
6096
6097 // Assign locations to all of the outgoing aggregate by value arguments.
6098 SmallVector<CCValAssign, 16> ByValArgLocs;
6099 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6100
6101 // Reserve stack space for the allocations in CCInfo.
6102 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6103
6104 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6105
6106 // Size of the linkage area, parameter list area and the part of the local
6107 // space variable where copies of aggregates which are passed by value are
6108 // stored.
6109 unsigned NumBytes = CCByValInfo.getStackSize();
6110
6111 // Calculate by how many bytes the stack has to be adjusted in case of tail
6112 // call optimization.
6113 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6114
6115 // Adjust the stack pointer for the new arguments...
6116 // These operations are automatically eliminated by the prolog/epilog pass
6117 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6118 SDValue CallSeqStart = Chain;
6119
6120 // Load the return address and frame pointer so it can be moved somewhere else
6121 // later.
6122 SDValue LROp, FPOp;
6123 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6124
6125 // Set up a copy of the stack pointer for use loading and storing any
6126 // arguments that may not fit in the registers available for argument
6127 // passing.
6128 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6129
6131 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6132 SmallVector<SDValue, 8> MemOpChains;
6133
6134 bool seenFloatArg = false;
6135 // Walk the register/memloc assignments, inserting copies/loads.
6136 // i - Tracks the index into the list of registers allocated for the call
6137 // RealArgIdx - Tracks the index into the list of actual function arguments
6138 // j - Tracks the index into the list of byval arguments
6139 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6140 i != e;
6141 ++i, ++RealArgIdx) {
6142 CCValAssign &VA = ArgLocs[i];
6143 SDValue Arg = OutVals[RealArgIdx];
6144 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6145
6146 if (Flags.isByVal()) {
6147 // Argument is an aggregate which is passed by value, thus we need to
6148 // create a copy of it in the local variable space of the current stack
6149 // frame (which is the stack frame of the caller) and pass the address of
6150 // this copy to the callee.
6151 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6152 CCValAssign &ByValVA = ByValArgLocs[j++];
6153 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6154
6155 // Memory reserved in the local variable space of the callers stack frame.
6156 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6157
6158 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6159 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6160 StackPtr, PtrOff);
6161
6162 // Create a copy of the argument in the local area of the current
6163 // stack frame.
6164 SDValue MemcpyCall =
6165 CreateCopyOfByValArgument(Arg, PtrOff,
6166 CallSeqStart.getNode()->getOperand(0),
6167 Flags, DAG, dl);
6168
6169 // This must go outside the CALLSEQ_START..END.
6170 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6171 SDLoc(MemcpyCall));
6172 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6173 NewCallSeqStart.getNode());
6174 Chain = CallSeqStart = NewCallSeqStart;
6175
6176 // Pass the address of the aggregate copy on the stack either in a
6177 // physical register or in the parameter list area of the current stack
6178 // frame to the callee.
6179 Arg = PtrOff;
6180 }
6181
6182 // When useCRBits() is true, there can be i1 arguments.
6183 // It is because getRegisterType(MVT::i1) => MVT::i1,
6184 // and for other integer types getRegisterType() => MVT::i32.
6185 // Extend i1 and ensure callee will get i32.
6186 if (Arg.getValueType() == MVT::i1)
6187 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6188 dl, MVT::i32, Arg);
6189
6190 if (VA.isRegLoc()) {
6191 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6192 // Put argument in a physical register.
6193 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6194 bool IsLE = Subtarget.isLittleEndian();
6195 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6196 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6197 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6198 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6199 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6200 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6201 SVal.getValue(0)));
6202 } else
6203 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6204 } else {
6205 // Put argument in the parameter list area of the current stack frame.
6206 assert(VA.isMemLoc());
6207 unsigned LocMemOffset = VA.getLocMemOffset();
6208
6209 if (!IsTailCall) {
6210 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6211 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6212 StackPtr, PtrOff);
6213
6214 MemOpChains.push_back(
6215 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6216 } else {
6217 // Calculate and remember argument location.
6218 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6219 TailCallArguments);
6220 }
6221 }
6222 }
6223
6224 if (!MemOpChains.empty())
6225 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6226
6227 // Build a sequence of copy-to-reg nodes chained together with token chain
6228 // and flag operands which copy the outgoing args into the appropriate regs.
6229 SDValue InGlue;
6230 for (const auto &[Reg, N] : RegsToPass) {
6231 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6232 InGlue = Chain.getValue(1);
6233 }
6234
6235 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6236 // registers.
6237 if (IsVarArg) {
6238 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6239 SDValue Ops[] = { Chain, InGlue };
6240
6241 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6242 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6243
6244 InGlue = Chain.getValue(1);
6245 }
6246
6247 if (IsTailCall)
6248 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6249 TailCallArguments);
6250
6251 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6252 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6253}
6254
6255// Copy an argument into memory, being careful to do this outside the
6256// call sequence for the call to which the argument belongs.
6257SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6258 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6259 SelectionDAG &DAG, const SDLoc &dl) const {
6260 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6261 CallSeqStart.getNode()->getOperand(0),
6262 Flags, DAG, dl);
6263 // The MEMCPY must go outside the CALLSEQ_START..END.
6264 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6265 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6266 SDLoc(MemcpyCall));
6267 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6268 NewCallSeqStart.getNode());
6269 return NewCallSeqStart;
6270}
6271
6272SDValue PPCTargetLowering::LowerCall_64SVR4(
6273 SDValue Chain, SDValue Callee, CallFlags CFlags,
6275 const SmallVectorImpl<SDValue> &OutVals,
6276 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6278 const CallBase *CB) const {
6279 bool isELFv2ABI = Subtarget.isELFv2ABI();
6280 bool isLittleEndian = Subtarget.isLittleEndian();
6281 unsigned NumOps = Outs.size();
6282 bool IsSibCall = false;
6283 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6284
6285 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6286 unsigned PtrByteSize = 8;
6287
6289
6290 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6291 IsSibCall = true;
6292
6293 // Mark this function as potentially containing a function that contains a
6294 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6295 // and restoring the callers stack pointer in this functions epilog. This is
6296 // done because by tail calling the called function might overwrite the value
6297 // in this function's (MF) stack pointer stack slot 0(SP).
6298 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6299 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6300
6301 assert(!(IsFastCall && CFlags.IsVarArg) &&
6302 "fastcc not supported on varargs functions");
6303
6304 // Count how many bytes are to be pushed on the stack, including the linkage
6305 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6306 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6307 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6308 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6309 unsigned NumBytes = LinkageSize;
6310 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6311
6312 static const MCPhysReg GPR[] = {
6313 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6314 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6315 };
6316 static const MCPhysReg VR[] = {
6317 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6318 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6319 };
6320
6321 const unsigned NumGPRs = std::size(GPR);
6322 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6323 const unsigned NumVRs = std::size(VR);
6324
6325 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6326 // can be passed to the callee in registers.
6327 // For the fast calling convention, there is another check below.
6328 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6329 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6330 if (!HasParameterArea) {
6331 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6332 unsigned AvailableFPRs = NumFPRs;
6333 unsigned AvailableVRs = NumVRs;
6334 unsigned NumBytesTmp = NumBytes;
6335 for (unsigned i = 0; i != NumOps; ++i) {
6336 if (Outs[i].Flags.isNest()) continue;
6337 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6338 PtrByteSize, LinkageSize, ParamAreaSize,
6339 NumBytesTmp, AvailableFPRs, AvailableVRs))
6340 HasParameterArea = true;
6341 }
6342 }
6343
6344 // When using the fast calling convention, we don't provide backing for
6345 // arguments that will be in registers.
6346 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6347
6348 // Avoid allocating parameter area for fastcc functions if all the arguments
6349 // can be passed in the registers.
6350 if (IsFastCall)
6351 HasParameterArea = false;
6352
6353 // Add up all the space actually used.
6354 for (unsigned i = 0; i != NumOps; ++i) {
6355 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6356 EVT ArgVT = Outs[i].VT;
6357 EVT OrigVT = Outs[i].ArgVT;
6358
6359 if (Flags.isNest())
6360 continue;
6361
6362 if (IsFastCall) {
6363 if (Flags.isByVal()) {
6364 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6365 if (NumGPRsUsed > NumGPRs)
6366 HasParameterArea = true;
6367 } else {
6368 switch (ArgVT.getSimpleVT().SimpleTy) {
6369 default: llvm_unreachable("Unexpected ValueType for argument!");
6370 case MVT::i1:
6371 case MVT::i32:
6372 case MVT::i64:
6373 if (++NumGPRsUsed <= NumGPRs)
6374 continue;
6375 break;
6376 case MVT::v4i32:
6377 case MVT::v8i16:
6378 case MVT::v16i8:
6379 case MVT::v2f64:
6380 case MVT::v2i64:
6381 case MVT::v1i128:
6382 case MVT::f128:
6383 if (++NumVRsUsed <= NumVRs)
6384 continue;
6385 break;
6386 case MVT::v4f32:
6387 if (++NumVRsUsed <= NumVRs)
6388 continue;
6389 break;
6390 case MVT::f32:
6391 case MVT::f64:
6392 if (++NumFPRsUsed <= NumFPRs)
6393 continue;
6394 break;
6395 }
6396 HasParameterArea = true;
6397 }
6398 }
6399
6400 /* Respect alignment of argument on the stack. */
6401 auto Alignement =
6402 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6403 NumBytes = alignTo(NumBytes, Alignement);
6404
6405 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6406 if (Flags.isInConsecutiveRegsLast())
6407 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6408 }
6409
6410 unsigned NumBytesActuallyUsed = NumBytes;
6411
6412 // In the old ELFv1 ABI,
6413 // the prolog code of the callee may store up to 8 GPR argument registers to
6414 // the stack, allowing va_start to index over them in memory if its varargs.
6415 // Because we cannot tell if this is needed on the caller side, we have to
6416 // conservatively assume that it is needed. As such, make sure we have at
6417 // least enough stack space for the caller to store the 8 GPRs.
6418 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6419 // really requires memory operands, e.g. a vararg function.
6420 if (HasParameterArea)
6421 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6422 else
6423 NumBytes = LinkageSize;
6424
6425 // Tail call needs the stack to be aligned.
6426 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6427 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6428
6429 int SPDiff = 0;
6430
6431 // Calculate by how many bytes the stack has to be adjusted in case of tail
6432 // call optimization.
6433 if (!IsSibCall)
6434 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6435
6436 // To protect arguments on the stack from being clobbered in a tail call,
6437 // force all the loads to happen before doing any other lowering.
6438 if (CFlags.IsTailCall)
6439 Chain = DAG.getStackArgumentTokenFactor(Chain);
6440
6441 // Adjust the stack pointer for the new arguments...
6442 // These operations are automatically eliminated by the prolog/epilog pass
6443 if (!IsSibCall)
6444 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6445 SDValue CallSeqStart = Chain;
6446
6447 // Load the return address and frame pointer so it can be move somewhere else
6448 // later.
6449 SDValue LROp, FPOp;
6450 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6451
6452 // Set up a copy of the stack pointer for use loading and storing any
6453 // arguments that may not fit in the registers available for argument
6454 // passing.
6455 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6456
6457 // Figure out which arguments are going to go in registers, and which in
6458 // memory. Also, if this is a vararg function, floating point operations
6459 // must be stored to our stack, and loaded into integer regs as well, if
6460 // any integer regs are available for argument passing.
6461 unsigned ArgOffset = LinkageSize;
6462
6464 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6465
6466 SmallVector<SDValue, 8> MemOpChains;
6467 for (unsigned i = 0; i != NumOps; ++i) {
6468 SDValue Arg = OutVals[i];
6469 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6470 EVT ArgVT = Outs[i].VT;
6471 EVT OrigVT = Outs[i].ArgVT;
6472
6473 // PtrOff will be used to store the current argument to the stack if a
6474 // register cannot be found for it.
6475 SDValue PtrOff;
6476
6477 // We re-align the argument offset for each argument, except when using the
6478 // fast calling convention, when we need to make sure we do that only when
6479 // we'll actually use a stack slot.
6480 auto ComputePtrOff = [&]() {
6481 /* Respect alignment of argument on the stack. */
6482 auto Alignment =
6483 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6484 ArgOffset = alignTo(ArgOffset, Alignment);
6485
6486 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6487
6488 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6489 };
6490
6491 if (!IsFastCall) {
6492 ComputePtrOff();
6493
6494 /* Compute GPR index associated with argument offset. */
6495 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6496 GPR_idx = std::min(GPR_idx, NumGPRs);
6497 }
6498
6499 // Promote integers to 64-bit values.
6500 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6501 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6502 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6503 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6504 }
6505
6506 // FIXME memcpy is used way more than necessary. Correctness first.
6507 // Note: "by value" is code for passing a structure by value, not
6508 // basic types.
6509 if (Flags.isByVal()) {
6510 // Note: Size includes alignment padding, so
6511 // struct x { short a; char b; }
6512 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6513 // These are the proper values we need for right-justifying the
6514 // aggregate in a parameter register.
6515 unsigned Size = Flags.getByValSize();
6516
6517 // An empty aggregate parameter takes up no storage and no
6518 // registers.
6519 if (Size == 0)
6520 continue;
6521
6522 if (IsFastCall)
6523 ComputePtrOff();
6524
6525 // All aggregates smaller than 8 bytes must be passed right-justified.
6526 if (Size==1 || Size==2 || Size==4) {
6527 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6528 if (GPR_idx != NumGPRs) {
6529 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6530 MachinePointerInfo(), VT);
6531 MemOpChains.push_back(Load.getValue(1));
6532 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6533
6534 ArgOffset += PtrByteSize;
6535 continue;
6536 }
6537 }
6538
6539 if (GPR_idx == NumGPRs && Size < 8) {
6540 SDValue AddPtr = PtrOff;
6541 if (!isLittleEndian) {
6542 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6543 PtrOff.getValueType());
6544 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6545 }
6546 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6547 CallSeqStart,
6548 Flags, DAG, dl);
6549 ArgOffset += PtrByteSize;
6550 continue;
6551 }
6552 // Copy the object to parameter save area if it can not be entirely passed
6553 // by registers.
6554 // FIXME: we only need to copy the parts which need to be passed in
6555 // parameter save area. For the parts passed by registers, we don't need
6556 // to copy them to the stack although we need to allocate space for them
6557 // in parameter save area.
6558 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6559 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6560 CallSeqStart,
6561 Flags, DAG, dl);
6562
6563 // When a register is available, pass a small aggregate right-justified.
6564 if (Size < 8 && GPR_idx != NumGPRs) {
6565 // The easiest way to get this right-justified in a register
6566 // is to copy the structure into the rightmost portion of a
6567 // local variable slot, then load the whole slot into the
6568 // register.
6569 // FIXME: The memcpy seems to produce pretty awful code for
6570 // small aggregates, particularly for packed ones.
6571 // FIXME: It would be preferable to use the slot in the
6572 // parameter save area instead of a new local variable.
6573 SDValue AddPtr = PtrOff;
6574 if (!isLittleEndian) {
6575 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6576 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6577 }
6578 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6579 CallSeqStart,
6580 Flags, DAG, dl);
6581
6582 // Load the slot into the register.
6583 SDValue Load =
6584 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6585 MemOpChains.push_back(Load.getValue(1));
6586 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6587
6588 // Done with this argument.
6589 ArgOffset += PtrByteSize;
6590 continue;
6591 }
6592
6593 // For aggregates larger than PtrByteSize, copy the pieces of the
6594 // object that fit into registers from the parameter save area.
6595 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6596 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6597 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6598 if (GPR_idx != NumGPRs) {
6599 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6600 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6601 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6602 MachinePointerInfo(), ObjType);
6603
6604 MemOpChains.push_back(Load.getValue(1));
6605 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6606 ArgOffset += PtrByteSize;
6607 } else {
6608 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6609 break;
6610 }
6611 }
6612 continue;
6613 }
6614
6615 switch (Arg.getSimpleValueType().SimpleTy) {
6616 default: llvm_unreachable("Unexpected ValueType for argument!");
6617 case MVT::i1:
6618 case MVT::i32:
6619 case MVT::i64:
6620 if (Flags.isNest()) {
6621 // The 'nest' parameter, if any, is passed in R11.
6622 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6623 break;
6624 }
6625
6626 // These can be scalar arguments or elements of an integer array type
6627 // passed directly. Clang may use those instead of "byval" aggregate
6628 // types to avoid forcing arguments to memory unnecessarily.
6629 if (GPR_idx != NumGPRs) {
6630 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6631 } else {
6632 if (IsFastCall)
6633 ComputePtrOff();
6634
6635 assert(HasParameterArea &&
6636 "Parameter area must exist to pass an argument in memory.");
6637 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6638 true, CFlags.IsTailCall, false, MemOpChains,
6639 TailCallArguments, dl);
6640 if (IsFastCall)
6641 ArgOffset += PtrByteSize;
6642 }
6643 if (!IsFastCall)
6644 ArgOffset += PtrByteSize;
6645 break;
6646 case MVT::f32:
6647 case MVT::f64: {
6648 // These can be scalar arguments or elements of a float array type
6649 // passed directly. The latter are used to implement ELFv2 homogenous
6650 // float aggregates.
6651
6652 // Named arguments go into FPRs first, and once they overflow, the
6653 // remaining arguments go into GPRs and then the parameter save area.
6654 // Unnamed arguments for vararg functions always go to GPRs and
6655 // then the parameter save area. For now, put all arguments to vararg
6656 // routines always in both locations (FPR *and* GPR or stack slot).
6657 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6658 bool NeededLoad = false;
6659
6660 // First load the argument into the next available FPR.
6661 if (FPR_idx != NumFPRs)
6662 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6663
6664 // Next, load the argument into GPR or stack slot if needed.
6665 if (!NeedGPROrStack)
6666 ;
6667 else if (GPR_idx != NumGPRs && !IsFastCall) {
6668 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6669 // once we support fp <-> gpr moves.
6670
6671 // In the non-vararg case, this can only ever happen in the
6672 // presence of f32 array types, since otherwise we never run
6673 // out of FPRs before running out of GPRs.
6674 SDValue ArgVal;
6675
6676 // Double values are always passed in a single GPR.
6677 if (Arg.getValueType() != MVT::f32) {
6678 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6679
6680 // Non-array float values are extended and passed in a GPR.
6681 } else if (!Flags.isInConsecutiveRegs()) {
6682 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6683 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6684
6685 // If we have an array of floats, we collect every odd element
6686 // together with its predecessor into one GPR.
6687 } else if (ArgOffset % PtrByteSize != 0) {
6688 SDValue Lo, Hi;
6689 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6690 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6691 if (!isLittleEndian)
6692 std::swap(Lo, Hi);
6693 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6694
6695 // The final element, if even, goes into the first half of a GPR.
6696 } else if (Flags.isInConsecutiveRegsLast()) {
6697 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6698 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6699 if (!isLittleEndian)
6700 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6701 DAG.getConstant(32, dl, MVT::i32));
6702
6703 // Non-final even elements are skipped; they will be handled
6704 // together the with subsequent argument on the next go-around.
6705 } else
6706 ArgVal = SDValue();
6707
6708 if (ArgVal.getNode())
6709 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6710 } else {
6711 if (IsFastCall)
6712 ComputePtrOff();
6713
6714 // Single-precision floating-point values are mapped to the
6715 // second (rightmost) word of the stack doubleword.
6716 if (Arg.getValueType() == MVT::f32 &&
6717 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6718 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6719 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6720 }
6721
6722 assert(HasParameterArea &&
6723 "Parameter area must exist to pass an argument in memory.");
6724 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6725 true, CFlags.IsTailCall, false, MemOpChains,
6726 TailCallArguments, dl);
6727
6728 NeededLoad = true;
6729 }
6730 // When passing an array of floats, the array occupies consecutive
6731 // space in the argument area; only round up to the next doubleword
6732 // at the end of the array. Otherwise, each float takes 8 bytes.
6733 if (!IsFastCall || NeededLoad) {
6734 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6735 Flags.isInConsecutiveRegs()) ? 4 : 8;
6736 if (Flags.isInConsecutiveRegsLast())
6737 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6738 }
6739 break;
6740 }
6741 case MVT::v4f32:
6742 case MVT::v4i32:
6743 case MVT::v8i16:
6744 case MVT::v16i8:
6745 case MVT::v2f64:
6746 case MVT::v2i64:
6747 case MVT::v1i128:
6748 case MVT::f128:
6749 // These can be scalar arguments or elements of a vector array type
6750 // passed directly. The latter are used to implement ELFv2 homogenous
6751 // vector aggregates.
6752
6753 // For a varargs call, named arguments go into VRs or on the stack as
6754 // usual; unnamed arguments always go to the stack or the corresponding
6755 // GPRs when within range. For now, we always put the value in both
6756 // locations (or even all three).
6757 if (CFlags.IsVarArg) {
6758 assert(HasParameterArea &&
6759 "Parameter area must exist if we have a varargs call.");
6760 // We could elide this store in the case where the object fits
6761 // entirely in R registers. Maybe later.
6762 SDValue Store =
6763 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6764 MemOpChains.push_back(Store);
6765 if (VR_idx != NumVRs) {
6766 SDValue Load =
6767 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6768 MemOpChains.push_back(Load.getValue(1));
6769 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6770 }
6771 ArgOffset += 16;
6772 for (unsigned i=0; i<16; i+=PtrByteSize) {
6773 if (GPR_idx == NumGPRs)
6774 break;
6775 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6776 DAG.getConstant(i, dl, PtrVT));
6777 SDValue Load =
6778 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6779 MemOpChains.push_back(Load.getValue(1));
6780 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6781 }
6782 break;
6783 }
6784
6785 // Non-varargs Altivec params go into VRs or on the stack.
6786 if (VR_idx != NumVRs) {
6787 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6788 } else {
6789 if (IsFastCall)
6790 ComputePtrOff();
6791
6792 assert(HasParameterArea &&
6793 "Parameter area must exist to pass an argument in memory.");
6794 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6795 true, CFlags.IsTailCall, true, MemOpChains,
6796 TailCallArguments, dl);
6797 if (IsFastCall)
6798 ArgOffset += 16;
6799 }
6800
6801 if (!IsFastCall)
6802 ArgOffset += 16;
6803 break;
6804 }
6805 }
6806
6807 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6808 "mismatch in size of parameter area");
6809 (void)NumBytesActuallyUsed;
6810
6811 if (!MemOpChains.empty())
6812 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6813
6814 // Check if this is an indirect call (MTCTR/BCTRL).
6815 // See prepareDescriptorIndirectCall and buildCallOperands for more
6816 // information about calls through function pointers in the 64-bit SVR4 ABI.
6817 if (CFlags.IsIndirect) {
6818 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6819 // caller in the TOC save area.
6820 if (isTOCSaveRestoreRequired(Subtarget)) {
6821 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6822 // Load r2 into a virtual register and store it to the TOC save area.
6823 setUsesTOCBasePtr(DAG);
6824 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6825 // TOC save area offset.
6826 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6827 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6828 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6829 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6831 DAG.getMachineFunction(), TOCSaveOffset));
6832 }
6833 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6834 // This does not mean the MTCTR instruction must use R12; it's easier
6835 // to model this as an extra parameter, so do that.
6836 if (isELFv2ABI && !CFlags.IsPatchPoint)
6837 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6838 }
6839
6840 // Build a sequence of copy-to-reg nodes chained together with token chain
6841 // and flag operands which copy the outgoing args into the appropriate regs.
6842 SDValue InGlue;
6843 for (const auto &[Reg, N] : RegsToPass) {
6844 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6845 InGlue = Chain.getValue(1);
6846 }
6847
6848 if (CFlags.IsTailCall && !IsSibCall)
6849 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6850 TailCallArguments);
6851
6852 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6853 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6854}
6855
6856// Returns true when the shadow of a general purpose argument register
6857// in the parameter save area is aligned to at least 'RequiredAlign'.
6858static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6859 assert(RequiredAlign.value() <= 16 &&
6860 "Required alignment greater than stack alignment.");
6861 switch (Reg) {
6862 default:
6863 report_fatal_error("called on invalid register.");
6864 case PPC::R5:
6865 case PPC::R9:
6866 case PPC::X3:
6867 case PPC::X5:
6868 case PPC::X7:
6869 case PPC::X9:
6870 // These registers are 16 byte aligned which is the most strict aligment
6871 // we can support.
6872 return true;
6873 case PPC::R3:
6874 case PPC::R7:
6875 case PPC::X4:
6876 case PPC::X6:
6877 case PPC::X8:
6878 case PPC::X10:
6879 // The shadow of these registers in the PSA is 8 byte aligned.
6880 return RequiredAlign <= 8;
6881 case PPC::R4:
6882 case PPC::R6:
6883 case PPC::R8:
6884 case PPC::R10:
6885 return RequiredAlign <= 4;
6886 }
6887}
6888
6889static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6890 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6891 Type *OrigTy, CCState &State) {
6892 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6894 const bool IsPPC64 = Subtarget.isPPC64();
6895 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6896 const Align PtrAlign(PtrSize);
6897 const Align StackAlign(16);
6898 const MVT RegVT = Subtarget.getScalarIntVT();
6899
6900 if (ValVT == MVT::f128)
6901 report_fatal_error("f128 is unimplemented on AIX.");
6902
6903 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6904 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6905 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6906 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6907 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6908 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6909
6910 static const MCPhysReg VR[] = {// Vector registers.
6911 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6912 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6913 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6914
6915 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6916
6917 if (ArgFlags.isNest()) {
6918 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6919 if (!EnvReg)
6920 report_fatal_error("More then one nest argument.");
6921 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6922 return false;
6923 }
6924
6925 if (ArgFlags.isByVal()) {
6926 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6927 if (ByValAlign > StackAlign)
6928 report_fatal_error("Pass-by-value arguments with alignment greater than "
6929 "16 are not supported.");
6930
6931 const unsigned ByValSize = ArgFlags.getByValSize();
6932 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6933
6934 // An empty aggregate parameter takes up no storage and no registers,
6935 // but needs a MemLoc for a stack slot for the formal arguments side.
6936 if (ByValSize == 0) {
6938 State.getStackSize(), RegVT, LocInfo));
6939 return false;
6940 }
6941
6942 // Shadow allocate any registers that are not properly aligned.
6943 unsigned NextReg = State.getFirstUnallocated(GPRs);
6944 while (NextReg != GPRs.size() &&
6945 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6946 // Shadow allocate next registers since its aligment is not strict enough.
6947 MCRegister Reg = State.AllocateReg(GPRs);
6948 // Allocate the stack space shadowed by said register.
6949 State.AllocateStack(PtrSize, PtrAlign);
6950 assert(Reg && "Alocating register unexpectedly failed.");
6951 (void)Reg;
6952 NextReg = State.getFirstUnallocated(GPRs);
6953 }
6954
6955 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6956 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6957 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6958 if (MCRegister Reg = State.AllocateReg(GPRs))
6959 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6960 else {
6963 LocInfo));
6964 break;
6965 }
6966 }
6967 return false;
6968 }
6969
6970 // Arguments always reserve parameter save area.
6971 switch (ValVT.SimpleTy) {
6972 default:
6973 report_fatal_error("Unhandled value type for argument.");
6974 case MVT::i64:
6975 // i64 arguments should have been split to i32 for PPC32.
6976 assert(IsPPC64 && "PPC32 should have split i64 values.");
6977 [[fallthrough]];
6978 case MVT::i1:
6979 case MVT::i32: {
6980 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6981 // AIX integer arguments are always passed in register width.
6982 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6983 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6985 if (MCRegister Reg = State.AllocateReg(GPRs))
6986 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6987 else
6988 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6989
6990 return false;
6991 }
6992 case MVT::f32:
6993 case MVT::f64: {
6994 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6995 const unsigned StoreSize = LocVT.getStoreSize();
6996 // Floats are always 4-byte aligned in the PSA on AIX.
6997 // This includes f64 in 64-bit mode for ABI compatibility.
6998 const unsigned Offset =
6999 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7000 MCRegister FReg = State.AllocateReg(FPR);
7001 if (FReg)
7002 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7003
7004 // Reserve and initialize GPRs or initialize the PSA as required.
7005 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
7006 if (MCRegister Reg = State.AllocateReg(GPRs)) {
7007 assert(FReg && "An FPR should be available when a GPR is reserved.");
7008 if (State.isVarArg()) {
7009 // Successfully reserved GPRs are only initialized for vararg calls.
7010 // Custom handling is required for:
7011 // f64 in PPC32 needs to be split into 2 GPRs.
7012 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7013 State.addLoc(
7014 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7015 }
7016 } else {
7017 // If there are insufficient GPRs, the PSA needs to be initialized.
7018 // Initialization occurs even if an FPR was initialized for
7019 // compatibility with the AIX XL compiler. The full memory for the
7020 // argument will be initialized even if a prior word is saved in GPR.
7021 // A custom memLoc is used when the argument also passes in FPR so
7022 // that the callee handling can skip over it easily.
7023 State.addLoc(
7024 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7025 LocInfo)
7026 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7027 break;
7028 }
7029 }
7030
7031 return false;
7032 }
7033 case MVT::v4f32:
7034 case MVT::v4i32:
7035 case MVT::v8i16:
7036 case MVT::v16i8:
7037 case MVT::v2i64:
7038 case MVT::v2f64:
7039 case MVT::v1i128: {
7040 const unsigned VecSize = 16;
7041 const Align VecAlign(VecSize);
7042
7043 if (!State.isVarArg()) {
7044 // If there are vector registers remaining we don't consume any stack
7045 // space.
7046 if (MCRegister VReg = State.AllocateReg(VR)) {
7047 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7048 return false;
7049 }
7050 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7051 // might be allocated in the portion of the PSA that is shadowed by the
7052 // GPRs.
7053 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7054 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7055 return false;
7056 }
7057
7058 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7059 // Burn any underaligned registers and their shadowed stack space until
7060 // we reach the required alignment.
7061 while (NextRegIndex != GPRs.size() &&
7062 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7063 // Shadow allocate register and its stack shadow.
7064 MCRegister Reg = State.AllocateReg(GPRs);
7065 State.AllocateStack(PtrSize, PtrAlign);
7066 assert(Reg && "Allocating register unexpectedly failed.");
7067 (void)Reg;
7068 NextRegIndex = State.getFirstUnallocated(GPRs);
7069 }
7070
7071 // Vectors that are passed as fixed arguments are handled differently.
7072 // They are passed in VRs if any are available (unlike arguments passed
7073 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7074 // functions)
7075 if (!ArgFlags.isVarArg()) {
7076 if (MCRegister VReg = State.AllocateReg(VR)) {
7077 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7078 // Shadow allocate GPRs and stack space even though we pass in a VR.
7079 for (unsigned I = 0; I != VecSize; I += PtrSize)
7080 State.AllocateReg(GPRs);
7081 State.AllocateStack(VecSize, VecAlign);
7082 return false;
7083 }
7084 // No vector registers remain so pass on the stack.
7085 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7086 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7087 return false;
7088 }
7089
7090 // If all GPRS are consumed then we pass the argument fully on the stack.
7091 if (NextRegIndex == GPRs.size()) {
7092 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7093 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7094 return false;
7095 }
7096
7097 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7098 // half of the argument, and then need to pass the remaining half on the
7099 // stack.
7100 if (GPRs[NextRegIndex] == PPC::R9) {
7101 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7102 State.addLoc(
7103 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7104
7105 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7106 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7107 assert(FirstReg && SecondReg &&
7108 "Allocating R9 or R10 unexpectedly failed.");
7109 State.addLoc(
7110 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7111 State.addLoc(
7112 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7113 return false;
7114 }
7115
7116 // We have enough GPRs to fully pass the vector argument, and we have
7117 // already consumed any underaligned registers. Start with the custom
7118 // MemLoc and then the custom RegLocs.
7119 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7120 State.addLoc(
7121 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7122 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7123 const MCRegister Reg = State.AllocateReg(GPRs);
7124 assert(Reg && "Failed to allocated register for vararg vector argument");
7125 State.addLoc(
7126 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7127 }
7128 return false;
7129 }
7130 }
7131 return true;
7132}
7133
7134// So far, this function is only used by LowerFormalArguments_AIX()
7136 bool IsPPC64,
7137 bool HasP8Vector,
7138 bool HasVSX) {
7139 assert((IsPPC64 || SVT != MVT::i64) &&
7140 "i64 should have been split for 32-bit codegen.");
7141
7142 switch (SVT) {
7143 default:
7144 report_fatal_error("Unexpected value type for formal argument");
7145 case MVT::i1:
7146 case MVT::i32:
7147 case MVT::i64:
7148 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7149 case MVT::f32:
7150 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7151 case MVT::f64:
7152 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7153 case MVT::v4f32:
7154 case MVT::v4i32:
7155 case MVT::v8i16:
7156 case MVT::v16i8:
7157 case MVT::v2i64:
7158 case MVT::v2f64:
7159 case MVT::v1i128:
7160 return &PPC::VRRCRegClass;
7161 }
7162}
7163
7165 SelectionDAG &DAG, SDValue ArgValue,
7166 MVT LocVT, const SDLoc &dl) {
7167 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7168 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7169
7170 if (Flags.isSExt())
7171 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7172 DAG.getValueType(ValVT));
7173 else if (Flags.isZExt())
7174 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7175 DAG.getValueType(ValVT));
7176
7177 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7178}
7179
7180static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7181 const unsigned LASize = FL->getLinkageSize();
7182
7183 if (PPC::GPRCRegClass.contains(Reg)) {
7184 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7185 "Reg must be a valid argument register!");
7186 return LASize + 4 * (Reg - PPC::R3);
7187 }
7188
7189 if (PPC::G8RCRegClass.contains(Reg)) {
7190 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7191 "Reg must be a valid argument register!");
7192 return LASize + 8 * (Reg - PPC::X3);
7193 }
7194
7195 llvm_unreachable("Only general purpose registers expected.");
7196}
7197
7198// AIX ABI Stack Frame Layout:
7199//
7200// Low Memory +--------------------------------------------+
7201// SP +---> | Back chain | ---+
7202// | +--------------------------------------------+ |
7203// | | Saved Condition Register | |
7204// | +--------------------------------------------+ |
7205// | | Saved Linkage Register | |
7206// | +--------------------------------------------+ | Linkage Area
7207// | | Reserved for compilers | |
7208// | +--------------------------------------------+ |
7209// | | Reserved for binders | |
7210// | +--------------------------------------------+ |
7211// | | Saved TOC pointer | ---+
7212// | +--------------------------------------------+
7213// | | Parameter save area |
7214// | +--------------------------------------------+
7215// | | Alloca space |
7216// | +--------------------------------------------+
7217// | | Local variable space |
7218// | +--------------------------------------------+
7219// | | Float/int conversion temporary |
7220// | +--------------------------------------------+
7221// | | Save area for AltiVec registers |
7222// | +--------------------------------------------+
7223// | | AltiVec alignment padding |
7224// | +--------------------------------------------+
7225// | | Save area for VRSAVE register |
7226// | +--------------------------------------------+
7227// | | Save area for General Purpose registers |
7228// | +--------------------------------------------+
7229// | | Save area for Floating Point registers |
7230// | +--------------------------------------------+
7231// +---- | Back chain |
7232// High Memory +--------------------------------------------+
7233//
7234// Specifications:
7235// AIX 7.2 Assembler Language Reference
7236// Subroutine linkage convention
7237
7238SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7239 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7240 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7241 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7242
7243 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7244 CallConv == CallingConv::Fast) &&
7245 "Unexpected calling convention!");
7246
7247 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7248 report_fatal_error("Tail call support is unimplemented on AIX.");
7249
7250 if (useSoftFloat())
7251 report_fatal_error("Soft float support is unimplemented on AIX.");
7252
7253 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7254
7255 const bool IsPPC64 = Subtarget.isPPC64();
7256 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7257
7258 // Assign locations to all of the incoming arguments.
7261 MachineFrameInfo &MFI = MF.getFrameInfo();
7262 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7263 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7264
7265 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7266 // Reserve space for the linkage area on the stack.
7267 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7268 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7269 uint64_t SaveStackPos = CCInfo.getStackSize();
7270 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7271 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7272
7274
7275 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7276 CCValAssign &VA = ArgLocs[I++];
7277 MVT LocVT = VA.getLocVT();
7278 MVT ValVT = VA.getValVT();
7279 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7280
7281 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7282 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7283 // For compatibility with the AIX XL compiler, the float args in the
7284 // parameter save area are initialized even if the argument is available
7285 // in register. The caller is required to initialize both the register
7286 // and memory, however, the callee can choose to expect it in either.
7287 // The memloc is dismissed here because the argument is retrieved from
7288 // the register.
7289 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7290 continue;
7291
7292 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7293 const TargetRegisterClass *RegClass = getRegClassForSVT(
7294 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7295 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7296 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7297 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7298 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7299 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7300 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7301 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7302 MachinePointerInfo(), Align(PtrByteSize));
7303 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7304 MemOps.push_back(StoreReg);
7305 }
7306
7307 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7308 unsigned StoreSize =
7309 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7310 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7311 }
7312
7313 auto HandleMemLoc = [&]() {
7314 const unsigned LocSize = LocVT.getStoreSize();
7315 const unsigned ValSize = ValVT.getStoreSize();
7316 assert((ValSize <= LocSize) &&
7317 "Object size is larger than size of MemLoc");
7318 int CurArgOffset = VA.getLocMemOffset();
7319 // Objects are right-justified because AIX is big-endian.
7320 if (LocSize > ValSize)
7321 CurArgOffset += LocSize - ValSize;
7322 // Potential tail calls could cause overwriting of argument stack slots.
7323 const bool IsImmutable =
7325 (CallConv == CallingConv::Fast));
7326 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7327 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7328 SDValue ArgValue =
7329 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7330
7331 // While the ABI specifies the argument type is (sign or zero) extended
7332 // out to register width, not all code is compliant. We truncate and
7333 // re-extend to be more forgiving of these callers when the argument type
7334 // is smaller than register width.
7335 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7336 ValVT.isInteger() &&
7337 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7338 // It is possible to have either real integer values
7339 // or integers that were not originally integers.
7340 // In the latter case, these could have came from structs,
7341 // and these integers would not have an extend on the parameter.
7342 // Since these types of integers do not have an extend specified
7343 // in the first place, the type of extend that we do should not matter.
7344 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7345 ? MVT::i8
7346 : ArgVT;
7347 SDValue ArgValueTrunc =
7348 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7349 SDValue ArgValueExt =
7350 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7351 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7352 InVals.push_back(ArgValueExt);
7353 } else {
7354 InVals.push_back(ArgValue);
7355 }
7356 };
7357
7358 // Vector arguments to VaArg functions are passed both on the stack, and
7359 // in any available GPRs. Load the value from the stack and add the GPRs
7360 // as live ins.
7361 if (VA.isMemLoc() && VA.needsCustom()) {
7362 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7363 assert(isVarArg && "Only use custom memloc for vararg.");
7364 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7365 // matching custom RegLocs.
7366 const unsigned OriginalValNo = VA.getValNo();
7367 (void)OriginalValNo;
7368
7369 auto HandleCustomVecRegLoc = [&]() {
7370 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7371 "Missing custom RegLoc.");
7372 VA = ArgLocs[I++];
7373 assert(VA.getValVT().isVector() &&
7374 "Unexpected Val type for custom RegLoc.");
7375 assert(VA.getValNo() == OriginalValNo &&
7376 "ValNo mismatch between custom MemLoc and RegLoc.");
7378 MF.addLiveIn(VA.getLocReg(),
7379 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7380 Subtarget.hasVSX()));
7381 };
7382
7383 HandleMemLoc();
7384 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7385 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7386 // R10.
7387 HandleCustomVecRegLoc();
7388 HandleCustomVecRegLoc();
7389
7390 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7391 // we passed the vector in R5, R6, R7 and R8.
7392 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7393 assert(!IsPPC64 &&
7394 "Only 2 custom RegLocs expected for 64-bit codegen.");
7395 HandleCustomVecRegLoc();
7396 HandleCustomVecRegLoc();
7397 }
7398
7399 continue;
7400 }
7401
7402 if (VA.isRegLoc()) {
7403 if (VA.getValVT().isScalarInteger())
7405 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7406 switch (VA.getValVT().SimpleTy) {
7407 default:
7408 report_fatal_error("Unhandled value type for argument.");
7409 case MVT::f32:
7411 break;
7412 case MVT::f64:
7414 break;
7415 }
7416 } else if (VA.getValVT().isVector()) {
7417 switch (VA.getValVT().SimpleTy) {
7418 default:
7419 report_fatal_error("Unhandled value type for argument.");
7420 case MVT::v16i8:
7422 break;
7423 case MVT::v8i16:
7425 break;
7426 case MVT::v4i32:
7427 case MVT::v2i64:
7428 case MVT::v1i128:
7430 break;
7431 case MVT::v4f32:
7432 case MVT::v2f64:
7434 break;
7435 }
7436 }
7437 }
7438
7439 if (Flags.isByVal() && VA.isMemLoc()) {
7440 const unsigned Size =
7441 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7442 PtrByteSize);
7443 const int FI = MF.getFrameInfo().CreateFixedObject(
7444 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7445 /* IsAliased */ true);
7446 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7447 InVals.push_back(FIN);
7448
7449 continue;
7450 }
7451
7452 if (Flags.isByVal()) {
7453 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7454
7455 const MCPhysReg ArgReg = VA.getLocReg();
7456 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7457
7458 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7459 const int FI = MF.getFrameInfo().CreateFixedObject(
7460 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7461 /* IsAliased */ true);
7462 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7463 InVals.push_back(FIN);
7464
7465 // Add live ins for all the RegLocs for the same ByVal.
7466 const TargetRegisterClass *RegClass =
7467 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7468
7469 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7470 unsigned Offset) {
7471 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7472 // Since the callers side has left justified the aggregate in the
7473 // register, we can simply store the entire register into the stack
7474 // slot.
7475 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7476 // The store to the fixedstack object is needed becuase accessing a
7477 // field of the ByVal will use a gep and load. Ideally we will optimize
7478 // to extracting the value from the register directly, and elide the
7479 // stores when the arguments address is not taken, but that will need to
7480 // be future work.
7481 SDValue Store = DAG.getStore(
7482 CopyFrom.getValue(1), dl, CopyFrom,
7485
7486 MemOps.push_back(Store);
7487 };
7488
7489 unsigned Offset = 0;
7490 HandleRegLoc(VA.getLocReg(), Offset);
7491 Offset += PtrByteSize;
7492 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7493 Offset += PtrByteSize) {
7494 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7495 "RegLocs should be for ByVal argument.");
7496
7497 const CCValAssign RL = ArgLocs[I++];
7498 HandleRegLoc(RL.getLocReg(), Offset);
7500 }
7501
7502 if (Offset != StackSize) {
7503 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7504 "Expected MemLoc for remaining bytes.");
7505 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7506 // Consume the MemLoc.The InVal has already been emitted, so nothing
7507 // more needs to be done.
7508 ++I;
7509 }
7510
7511 continue;
7512 }
7513
7514 if (VA.isRegLoc() && !VA.needsCustom()) {
7515 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7516 Register VReg =
7517 MF.addLiveIn(VA.getLocReg(),
7518 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7519 Subtarget.hasVSX()));
7520 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7521 if (ValVT.isScalarInteger() &&
7522 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7523 ArgValue =
7524 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7525 }
7526 InVals.push_back(ArgValue);
7527 continue;
7528 }
7529 if (VA.isMemLoc()) {
7530 HandleMemLoc();
7531 continue;
7532 }
7533 }
7534
7535 // On AIX a minimum of 8 words is saved to the parameter save area.
7536 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7537 // Area that is at least reserved in the caller of this function.
7538 unsigned CallerReservedArea = std::max<unsigned>(
7539 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7540
7541 // Set the size that is at least reserved in caller of this function. Tail
7542 // call optimized function's reserved stack space needs to be aligned so
7543 // that taking the difference between two stack areas will result in an
7544 // aligned stack.
7545 CallerReservedArea =
7546 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7547 FuncInfo->setMinReservedArea(CallerReservedArea);
7548
7549 if (isVarArg) {
7550 FuncInfo->setVarArgsFrameIndex(
7551 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7552 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7553
7554 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7555 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7556
7557 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7558 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7559 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7560
7561 // The fixed integer arguments of a variadic function are stored to the
7562 // VarArgsFrameIndex on the stack so that they may be loaded by
7563 // dereferencing the result of va_next.
7564 for (unsigned GPRIndex =
7565 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7566 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7567
7568 const Register VReg =
7569 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7570 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7571
7572 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7573 SDValue Store =
7574 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7575 MemOps.push_back(Store);
7576 // Increment the address for the next argument to store.
7577 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7578 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7579 }
7580 }
7581
7582 if (!MemOps.empty())
7583 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7584
7585 return Chain;
7586}
7587
7588SDValue PPCTargetLowering::LowerCall_AIX(
7589 SDValue Chain, SDValue Callee, CallFlags CFlags,
7591 const SmallVectorImpl<SDValue> &OutVals,
7592 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7594 const CallBase *CB) const {
7595 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7596 // AIX ABI stack frame layout.
7597
7598 assert((CFlags.CallConv == CallingConv::C ||
7599 CFlags.CallConv == CallingConv::Cold ||
7600 CFlags.CallConv == CallingConv::Fast) &&
7601 "Unexpected calling convention!");
7602
7603 if (CFlags.IsPatchPoint)
7604 report_fatal_error("This call type is unimplemented on AIX.");
7605
7606 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7607
7610 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7611 *DAG.getContext());
7612
7613 // Reserve space for the linkage save area (LSA) on the stack.
7614 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7615 // [SP][CR][LR][2 x reserved][TOC].
7616 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7617 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7618 const bool IsPPC64 = Subtarget.isPPC64();
7619 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7620 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7621 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7622 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7623
7624 // The prolog code of the callee may store up to 8 GPR argument registers to
7625 // the stack, allowing va_start to index over them in memory if the callee
7626 // is variadic.
7627 // Because we cannot tell if this is needed on the caller side, we have to
7628 // conservatively assume that it is needed. As such, make sure we have at
7629 // least enough stack space for the caller to store the 8 GPRs.
7630 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7631 const unsigned NumBytes = std::max<unsigned>(
7632 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7633
7634 // Adjust the stack pointer for the new arguments...
7635 // These operations are automatically eliminated by the prolog/epilog pass.
7636 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7637 SDValue CallSeqStart = Chain;
7638
7640 SmallVector<SDValue, 8> MemOpChains;
7641
7642 // Set up a copy of the stack pointer for loading and storing any
7643 // arguments that may not fit in the registers available for argument
7644 // passing.
7645 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7646 : DAG.getRegister(PPC::R1, MVT::i32);
7647
7648 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7649 const unsigned ValNo = ArgLocs[I].getValNo();
7650 SDValue Arg = OutVals[ValNo];
7651 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7652
7653 if (Flags.isByVal()) {
7654 const unsigned ByValSize = Flags.getByValSize();
7655
7656 // Nothing to do for zero-sized ByVals on the caller side.
7657 if (!ByValSize) {
7658 ++I;
7659 continue;
7660 }
7661
7662 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7663 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7664 (LoadOffset != 0)
7665 ? DAG.getObjectPtrOffset(
7666 dl, Arg, TypeSize::getFixed(LoadOffset))
7667 : Arg,
7668 MachinePointerInfo(), VT);
7669 };
7670
7671 unsigned LoadOffset = 0;
7672
7673 // Initialize registers, which are fully occupied by the by-val argument.
7674 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7675 SDValue Load = GetLoad(PtrVT, LoadOffset);
7676 MemOpChains.push_back(Load.getValue(1));
7677 LoadOffset += PtrByteSize;
7678 const CCValAssign &ByValVA = ArgLocs[I++];
7679 assert(ByValVA.getValNo() == ValNo &&
7680 "Unexpected location for pass-by-value argument.");
7681 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7682 }
7683
7684 if (LoadOffset == ByValSize)
7685 continue;
7686
7687 // There must be one more loc to handle the remainder.
7688 assert(ArgLocs[I].getValNo() == ValNo &&
7689 "Expected additional location for by-value argument.");
7690
7691 if (ArgLocs[I].isMemLoc()) {
7692 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7693 const CCValAssign &ByValVA = ArgLocs[I++];
7694 ISD::ArgFlagsTy MemcpyFlags = Flags;
7695 // Only memcpy the bytes that don't pass in register.
7696 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7697 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7698 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7699 dl, Arg, TypeSize::getFixed(LoadOffset))
7700 : Arg,
7702 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7703 CallSeqStart, MemcpyFlags, DAG, dl);
7704 continue;
7705 }
7706
7707 // Initialize the final register residue.
7708 // Any residue that occupies the final by-val arg register must be
7709 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7710 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7711 // 2 and 1 byte loads.
7712 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7713 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7714 "Unexpected register residue for by-value argument.");
7715 SDValue ResidueVal;
7716 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7717 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7718 const MVT VT =
7719 N == 1 ? MVT::i8
7720 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7721 SDValue Load = GetLoad(VT, LoadOffset);
7722 MemOpChains.push_back(Load.getValue(1));
7723 LoadOffset += N;
7724 Bytes += N;
7725
7726 // By-val arguments are passed left-justfied in register.
7727 // Every load here needs to be shifted, otherwise a full register load
7728 // should have been used.
7729 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7730 "Unexpected load emitted during handling of pass-by-value "
7731 "argument.");
7732 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7733 EVT ShiftAmountTy =
7734 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7735 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7736 SDValue ShiftedLoad =
7737 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7738 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7739 ShiftedLoad)
7740 : ShiftedLoad;
7741 }
7742
7743 const CCValAssign &ByValVA = ArgLocs[I++];
7744 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7745 continue;
7746 }
7747
7748 CCValAssign &VA = ArgLocs[I++];
7749 const MVT LocVT = VA.getLocVT();
7750 const MVT ValVT = VA.getValVT();
7751
7752 switch (VA.getLocInfo()) {
7753 default:
7754 report_fatal_error("Unexpected argument extension type.");
7755 case CCValAssign::Full:
7756 break;
7757 case CCValAssign::ZExt:
7758 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7759 break;
7760 case CCValAssign::SExt:
7761 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7762 break;
7763 }
7764
7765 if (VA.isRegLoc() && !VA.needsCustom()) {
7766 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7767 continue;
7768 }
7769
7770 // Vector arguments passed to VarArg functions need custom handling when
7771 // they are passed (at least partially) in GPRs.
7772 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7773 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7774 // Store value to its stack slot.
7775 SDValue PtrOff =
7776 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7777 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7778 SDValue Store =
7779 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7780 MemOpChains.push_back(Store);
7781 const unsigned OriginalValNo = VA.getValNo();
7782 // Then load the GPRs from the stack
7783 unsigned LoadOffset = 0;
7784 auto HandleCustomVecRegLoc = [&]() {
7785 assert(I != E && "Unexpected end of CCvalAssigns.");
7786 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7787 "Expected custom RegLoc.");
7788 CCValAssign RegVA = ArgLocs[I++];
7789 assert(RegVA.getValNo() == OriginalValNo &&
7790 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7791 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7792 DAG.getConstant(LoadOffset, dl, PtrVT));
7793 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7794 MemOpChains.push_back(Load.getValue(1));
7795 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7796 LoadOffset += PtrByteSize;
7797 };
7798
7799 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7800 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7801 // R10.
7802 HandleCustomVecRegLoc();
7803 HandleCustomVecRegLoc();
7804
7805 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7806 ArgLocs[I].getValNo() == OriginalValNo) {
7807 assert(!IsPPC64 &&
7808 "Only 2 custom RegLocs expected for 64-bit codegen.");
7809 HandleCustomVecRegLoc();
7810 HandleCustomVecRegLoc();
7811 }
7812
7813 continue;
7814 }
7815
7816 if (VA.isMemLoc()) {
7817 SDValue PtrOff =
7818 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7819 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7820 MemOpChains.push_back(
7821 DAG.getStore(Chain, dl, Arg, PtrOff,
7823 Subtarget.getFrameLowering()->getStackAlign()));
7824
7825 continue;
7826 }
7827
7828 if (!ValVT.isFloatingPoint())
7830 "Unexpected register handling for calling convention.");
7831
7832 // Custom handling is used for GPR initializations for vararg float
7833 // arguments.
7834 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7835 LocVT.isInteger() &&
7836 "Custom register handling only expected for VarArg.");
7837
7838 SDValue ArgAsInt =
7839 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7840
7841 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7842 // f32 in 32-bit GPR
7843 // f64 in 64-bit GPR
7844 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7845 else if (Arg.getValueType().getFixedSizeInBits() <
7846 LocVT.getFixedSizeInBits())
7847 // f32 in 64-bit GPR.
7848 RegsToPass.push_back(std::make_pair(
7849 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7850 else {
7851 // f64 in two 32-bit GPRs
7852 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7853 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7854 "Unexpected custom register for argument!");
7855 CCValAssign &GPR1 = VA;
7856 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7857 DAG.getConstant(32, dl, MVT::i8));
7858 RegsToPass.push_back(std::make_pair(
7859 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7860
7861 if (I != E) {
7862 // If only 1 GPR was available, there will only be one custom GPR and
7863 // the argument will also pass in memory.
7864 CCValAssign &PeekArg = ArgLocs[I];
7865 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7866 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7867 CCValAssign &GPR2 = ArgLocs[I++];
7868 RegsToPass.push_back(std::make_pair(
7869 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7870 }
7871 }
7872 }
7873 }
7874
7875 if (!MemOpChains.empty())
7876 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7877
7878 // For indirect calls, we need to save the TOC base to the stack for
7879 // restoration after the call.
7880 if (CFlags.IsIndirect) {
7881 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7882 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7883 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7884 const MVT PtrVT = Subtarget.getScalarIntVT();
7885 const unsigned TOCSaveOffset =
7886 Subtarget.getFrameLowering()->getTOCSaveOffset();
7887
7888 setUsesTOCBasePtr(DAG);
7889 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7890 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7891 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7892 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7893 Chain = DAG.getStore(
7894 Val.getValue(1), dl, Val, AddPtr,
7895 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7896 }
7897
7898 // Build a sequence of copy-to-reg nodes chained together with token chain
7899 // and flag operands which copy the outgoing args into the appropriate regs.
7900 SDValue InGlue;
7901 for (auto Reg : RegsToPass) {
7902 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7903 InGlue = Chain.getValue(1);
7904 }
7905
7906 const int SPDiff = 0;
7907 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7908 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7909}
7910
7911bool
7912PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7913 MachineFunction &MF, bool isVarArg,
7915 LLVMContext &Context,
7916 const Type *RetTy) const {
7918 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7919 return CCInfo.CheckReturn(
7920 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7922 : RetCC_PPC);
7923}
7924
7925SDValue
7926PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7927 bool isVarArg,
7929 const SmallVectorImpl<SDValue> &OutVals,
7930 const SDLoc &dl, SelectionDAG &DAG) const {
7932 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7933 *DAG.getContext());
7934 CCInfo.AnalyzeReturn(Outs,
7935 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7937 : RetCC_PPC);
7938
7939 SDValue Glue;
7940 SmallVector<SDValue, 4> RetOps(1, Chain);
7941
7942 // Copy the result values into the output registers.
7943 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7944 CCValAssign &VA = RVLocs[i];
7945 assert(VA.isRegLoc() && "Can only return in registers!");
7946
7947 SDValue Arg = OutVals[RealResIdx];
7948
7949 switch (VA.getLocInfo()) {
7950 default: llvm_unreachable("Unknown loc info!");
7951 case CCValAssign::Full: break;
7952 case CCValAssign::AExt:
7953 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7954 break;
7955 case CCValAssign::ZExt:
7956 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7957 break;
7958 case CCValAssign::SExt:
7959 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7960 break;
7961 }
7962 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7963 bool isLittleEndian = Subtarget.isLittleEndian();
7964 // Legalize ret f64 -> ret 2 x i32.
7965 SDValue SVal =
7966 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7967 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7968 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7969 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7970 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7971 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7972 Glue = Chain.getValue(1);
7973 VA = RVLocs[++i]; // skip ahead to next loc
7974 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7975 } else
7976 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7977 Glue = Chain.getValue(1);
7978 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7979 }
7980
7981 RetOps[0] = Chain; // Update chain.
7982
7983 // Add the glue if we have it.
7984 if (Glue.getNode())
7985 RetOps.push_back(Glue);
7986
7987 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7988}
7989
7990SDValue
7991PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 SDLoc dl(Op);
7994
7995 // Get the correct type for integers.
7996 EVT IntVT = Op.getValueType();
7997
7998 // Get the inputs.
7999 SDValue Chain = Op.getOperand(0);
8000 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8001 // Build a DYNAREAOFFSET node.
8002 SDValue Ops[2] = {Chain, FPSIdx};
8003 SDVTList VTs = DAG.getVTList(IntVT);
8004 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
8005}
8006
8007SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
8008 SelectionDAG &DAG) const {
8009 // When we pop the dynamic allocation we need to restore the SP link.
8010 SDLoc dl(Op);
8011
8012 // Get the correct type for pointers.
8013 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8014
8015 // Construct the stack pointer operand.
8016 bool isPPC64 = Subtarget.isPPC64();
8017 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
8018 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
8019
8020 // Get the operands for the STACKRESTORE.
8021 SDValue Chain = Op.getOperand(0);
8022 SDValue SaveSP = Op.getOperand(1);
8023
8024 // Load the old link SP.
8025 SDValue LoadLinkSP =
8026 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
8027
8028 // Restore the stack pointer.
8029 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
8030
8031 // Store the old link SP.
8032 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
8033}
8034
8035SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
8037 bool isPPC64 = Subtarget.isPPC64();
8038 EVT PtrVT = getPointerTy(MF.getDataLayout());
8039
8040 // Get current frame pointer save index. The users of this index will be
8041 // primarily DYNALLOC instructions.
8043 int RASI = FI->getReturnAddrSaveIndex();
8044
8045 // If the frame pointer save index hasn't been defined yet.
8046 if (!RASI) {
8047 // Find out what the fix offset of the frame pointer save area.
8048 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8049 // Allocate the frame index for frame pointer save area.
8050 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8051 // Save the result.
8052 FI->setReturnAddrSaveIndex(RASI);
8053 }
8054 return DAG.getFrameIndex(RASI, PtrVT);
8055}
8056
8057SDValue
8058PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8060 bool isPPC64 = Subtarget.isPPC64();
8061 EVT PtrVT = getPointerTy(MF.getDataLayout());
8062
8063 // Get current frame pointer save index. The users of this index will be
8064 // primarily DYNALLOC instructions.
8066 int FPSI = FI->getFramePointerSaveIndex();
8067
8068 // If the frame pointer save index hasn't been defined yet.
8069 if (!FPSI) {
8070 // Find out what the fix offset of the frame pointer save area.
8071 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8072 // Allocate the frame index for frame pointer save area.
8073 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8074 // Save the result.
8075 FI->setFramePointerSaveIndex(FPSI);
8076 }
8077 return DAG.getFrameIndex(FPSI, PtrVT);
8078}
8079
8080SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8081 SelectionDAG &DAG) const {
8083 // Get the inputs.
8084 SDValue Chain = Op.getOperand(0);
8085 SDValue Size = Op.getOperand(1);
8086 SDLoc dl(Op);
8087
8088 // Get the correct type for pointers.
8089 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8090 // Negate the size.
8091 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8092 DAG.getConstant(0, dl, PtrVT), Size);
8093 // Construct a node for the frame pointer save index.
8094 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8095 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8096 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8097 if (hasInlineStackProbe(MF))
8098 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8099 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8100}
8101
8102SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8103 SelectionDAG &DAG) const {
8105
8106 bool isPPC64 = Subtarget.isPPC64();
8107 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8108
8109 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8110 return DAG.getFrameIndex(FI, PtrVT);
8111}
8112
8113SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8114 SelectionDAG &DAG) const {
8115 SDLoc DL(Op);
8116 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8117 DAG.getVTList(MVT::i32, MVT::Other),
8118 Op.getOperand(0), Op.getOperand(1));
8119}
8120
8121SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8122 SelectionDAG &DAG) const {
8123 SDLoc DL(Op);
8124 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8125 Op.getOperand(0), Op.getOperand(1));
8126}
8127
8128SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8129 if (Op.getValueType().isVector())
8130 return LowerVectorLoad(Op, DAG);
8131
8132 assert(Op.getValueType() == MVT::i1 &&
8133 "Custom lowering only for i1 loads");
8134
8135 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8136
8137 SDLoc dl(Op);
8138 LoadSDNode *LD = cast<LoadSDNode>(Op);
8139
8140 SDValue Chain = LD->getChain();
8141 SDValue BasePtr = LD->getBasePtr();
8142 MachineMemOperand *MMO = LD->getMemOperand();
8143
8144 SDValue NewLD =
8145 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8146 BasePtr, MVT::i8, MMO);
8147 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8148
8149 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8150 return DAG.getMergeValues(Ops, dl);
8151}
8152
8153SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8154 if (Op.getOperand(1).getValueType().isVector())
8155 return LowerVectorStore(Op, DAG);
8156
8157 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8158 "Custom lowering only for i1 stores");
8159
8160 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8161
8162 SDLoc dl(Op);
8163 StoreSDNode *ST = cast<StoreSDNode>(Op);
8164
8165 SDValue Chain = ST->getChain();
8166 SDValue BasePtr = ST->getBasePtr();
8167 SDValue Value = ST->getValue();
8168 MachineMemOperand *MMO = ST->getMemOperand();
8169
8171 Value);
8172 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8173}
8174
8175// FIXME: Remove this once the ANDI glue bug is fixed:
8176SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8177 assert(Op.getValueType() == MVT::i1 &&
8178 "Custom lowering only for i1 results");
8179
8180 SDLoc DL(Op);
8181 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8182}
8183
8184SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8185 SelectionDAG &DAG) const {
8186
8187 // Implements a vector truncate that fits in a vector register as a shuffle.
8188 // We want to legalize vector truncates down to where the source fits in
8189 // a vector register (and target is therefore smaller than vector register
8190 // size). At that point legalization will try to custom lower the sub-legal
8191 // result and get here - where we can contain the truncate as a single target
8192 // operation.
8193
8194 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8195 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8196 //
8197 // We will implement it for big-endian ordering as this (where x denotes
8198 // undefined):
8199 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8200 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8201 //
8202 // The same operation in little-endian ordering will be:
8203 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8204 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8205
8206 EVT TrgVT = Op.getValueType();
8207 assert(TrgVT.isVector() && "Vector type expected.");
8208 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8209 EVT EltVT = TrgVT.getVectorElementType();
8210 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8211 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8212 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8213 return SDValue();
8214
8215 SDValue N1 = Op.getOperand(0);
8216 EVT SrcVT = N1.getValueType();
8217 unsigned SrcSize = SrcVT.getSizeInBits();
8218 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8219 !llvm::has_single_bit<uint32_t>(
8221 return SDValue();
8222 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8223 return SDValue();
8224
8225 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8226 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8227
8228 SDLoc DL(Op);
8229 SDValue Op1, Op2;
8230 if (SrcSize == 256) {
8231 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8232 EVT SplitVT =
8234 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8235 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8236 DAG.getConstant(0, DL, VecIdxTy));
8237 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8238 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8239 }
8240 else {
8241 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8242 Op2 = DAG.getUNDEF(WideVT);
8243 }
8244
8245 // First list the elements we want to keep.
8246 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8247 SmallVector<int, 16> ShuffV;
8248 if (Subtarget.isLittleEndian())
8249 for (unsigned i = 0; i < TrgNumElts; ++i)
8250 ShuffV.push_back(i * SizeMult);
8251 else
8252 for (unsigned i = 1; i <= TrgNumElts; ++i)
8253 ShuffV.push_back(i * SizeMult - 1);
8254
8255 // Populate the remaining elements with undefs.
8256 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8257 // ShuffV.push_back(i + WideNumElts);
8258 ShuffV.push_back(WideNumElts + 1);
8259
8260 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8261 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8262 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8263}
8264
8265/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8266/// possible.
8267SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8268 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8269 EVT ResVT = Op.getValueType();
8270 EVT CmpVT = Op.getOperand(0).getValueType();
8271 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8272 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8273 SDLoc dl(Op);
8274
8275 // Without power9-vector, we don't have native instruction for f128 comparison.
8276 // Following transformation to libcall is needed for setcc:
8277 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8278 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8279 SDValue Z = DAG.getSetCC(
8280 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8281 LHS, RHS, CC);
8282 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8283 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8284 }
8285
8286 // Not FP, or using SPE? Not a fsel.
8287 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8288 Subtarget.hasSPE())
8289 return Op;
8290
8291 SDNodeFlags Flags = Op.getNode()->getFlags();
8292
8293 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8294 // presence of infinities.
8295 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8296 switch (CC) {
8297 default:
8298 break;
8299 case ISD::SETOGT:
8300 case ISD::SETGT:
8301 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8302 case ISD::SETOLT:
8303 case ISD::SETLT:
8304 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8305 }
8306 }
8307
8308 // We might be able to do better than this under some circumstances, but in
8309 // general, fsel-based lowering of select is a finite-math-only optimization.
8310 // For more information, see section F.3 of the 2.06 ISA specification.
8311 // With ISA 3.0
8312 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8313 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8314 ResVT == MVT::f128)
8315 return Op;
8316
8317 // If the RHS of the comparison is a 0.0, we don't need to do the
8318 // subtraction at all.
8319 SDValue Sel1;
8320 if (isFloatingPointZero(RHS))
8321 switch (CC) {
8322 default: break; // SETUO etc aren't handled by fsel.
8323 case ISD::SETNE:
8324 std::swap(TV, FV);
8325 [[fallthrough]];
8326 case ISD::SETEQ:
8327 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8328 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8329 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8330 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8331 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8332 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8333 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8334 case ISD::SETULT:
8335 case ISD::SETLT:
8336 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8337 [[fallthrough]];
8338 case ISD::SETOGE:
8339 case ISD::SETGE:
8340 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8341 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8342 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8343 case ISD::SETUGT:
8344 case ISD::SETGT:
8345 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8346 [[fallthrough]];
8347 case ISD::SETOLE:
8348 case ISD::SETLE:
8349 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8350 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8351 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8352 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8353 }
8354
8355 SDValue Cmp;
8356 switch (CC) {
8357 default: break; // SETUO etc aren't handled by fsel.
8358 case ISD::SETNE:
8359 std::swap(TV, FV);
8360 [[fallthrough]];
8361 case ISD::SETEQ:
8362 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8363 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8364 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8365 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8366 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8367 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8368 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8369 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8370 case ISD::SETULT:
8371 case ISD::SETLT:
8372 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8373 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8374 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8375 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8376 case ISD::SETOGE:
8377 case ISD::SETGE:
8378 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8379 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8380 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8381 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8382 case ISD::SETUGT:
8383 case ISD::SETGT:
8384 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8385 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8386 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8387 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8388 case ISD::SETOLE:
8389 case ISD::SETLE:
8390 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8391 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8392 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8393 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8394 }
8395 return Op;
8396}
8397
8398static unsigned getPPCStrictOpcode(unsigned Opc) {
8399 switch (Opc) {
8400 default:
8401 llvm_unreachable("No strict version of this opcode!");
8402 case PPCISD::FCTIDZ:
8403 return PPCISD::STRICT_FCTIDZ;
8404 case PPCISD::FCTIWZ:
8405 return PPCISD::STRICT_FCTIWZ;
8406 case PPCISD::FCTIDUZ:
8408 case PPCISD::FCTIWUZ:
8410 case PPCISD::FCFID:
8411 return PPCISD::STRICT_FCFID;
8412 case PPCISD::FCFIDU:
8413 return PPCISD::STRICT_FCFIDU;
8414 case PPCISD::FCFIDS:
8415 return PPCISD::STRICT_FCFIDS;
8416 case PPCISD::FCFIDUS:
8418 }
8419}
8420
8422 const PPCSubtarget &Subtarget) {
8423 SDLoc dl(Op);
8424 bool IsStrict = Op->isStrictFPOpcode();
8425 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8426 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8427
8428 // TODO: Any other flags to propagate?
8429 SDNodeFlags Flags;
8430 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8431
8432 // For strict nodes, source is the second operand.
8433 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8434 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8435 MVT DestTy = Op.getSimpleValueType();
8436 assert(Src.getValueType().isFloatingPoint() &&
8437 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8438 DestTy == MVT::i64) &&
8439 "Invalid FP_TO_INT types");
8440 if (Src.getValueType() == MVT::f32) {
8441 if (IsStrict) {
8442 Src =
8444 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8445 Chain = Src.getValue(1);
8446 } else
8447 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8448 }
8449 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8450 DestTy = Subtarget.getScalarIntVT();
8451 unsigned Opc = ISD::DELETED_NODE;
8452 switch (DestTy.SimpleTy) {
8453 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8454 case MVT::i32:
8455 Opc = IsSigned ? PPCISD::FCTIWZ
8456 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8457 break;
8458 case MVT::i64:
8459 assert((IsSigned || Subtarget.hasFPCVT()) &&
8460 "i64 FP_TO_UINT is supported only with FPCVT");
8461 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8462 }
8463 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8464 SDValue Conv;
8465 if (IsStrict) {
8467 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8468 Flags);
8469 } else {
8470 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8471 }
8472 return Conv;
8473}
8474
8475void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8476 SelectionDAG &DAG,
8477 const SDLoc &dl) const {
8478 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8479 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8480 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8481 bool IsStrict = Op->isStrictFPOpcode();
8482
8483 // Convert the FP value to an int value through memory.
8484 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8485 (IsSigned || Subtarget.hasFPCVT());
8486 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8487 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8488 MachinePointerInfo MPI =
8490
8491 // Emit a store to the stack slot.
8492 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8493 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8494 if (i32Stack) {
8496 Alignment = Align(4);
8497 MachineMemOperand *MMO =
8498 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8499 SDValue Ops[] = { Chain, Tmp, FIPtr };
8500 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8501 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8502 } else
8503 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8504
8505 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8506 // add in a bias on big endian.
8507 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8508 !Subtarget.isLittleEndian()) {
8509 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8510 DAG.getConstant(4, dl, FIPtr.getValueType()));
8511 MPI = MPI.getWithOffset(4);
8512 }
8513
8514 RLI.Chain = Chain;
8515 RLI.Ptr = FIPtr;
8516 RLI.MPI = MPI;
8517 RLI.Alignment = Alignment;
8518}
8519
8520/// Custom lowers floating point to integer conversions to use
8521/// the direct move instructions available in ISA 2.07 to avoid the
8522/// need for load/store combinations.
8523SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8524 SelectionDAG &DAG,
8525 const SDLoc &dl) const {
8526 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8527 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8528 if (Op->isStrictFPOpcode())
8529 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8530 else
8531 return Mov;
8532}
8533
8534SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8535 const SDLoc &dl) const {
8536 bool IsStrict = Op->isStrictFPOpcode();
8537 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8538 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8539 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8540 EVT SrcVT = Src.getValueType();
8541 EVT DstVT = Op.getValueType();
8542
8543 // FP to INT conversions are legal for f128.
8544 if (SrcVT == MVT::f128)
8545 return Subtarget.hasP9Vector() ? Op : SDValue();
8546
8547 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8548 // PPC (the libcall is not available).
8549 if (SrcVT == MVT::ppcf128) {
8550 if (DstVT == MVT::i32) {
8551 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8552 // set other fast-math flags to FP operations in both strict and
8553 // non-strict cases. (FP_TO_SINT, FSUB)
8555 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8556
8557 if (IsSigned) {
8558 SDValue Lo, Hi;
8559 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8560
8561 // Add the two halves of the long double in round-to-zero mode, and use
8562 // a smaller FP_TO_SINT.
8563 if (IsStrict) {
8565 DAG.getVTList(MVT::f64, MVT::Other),
8566 {Op.getOperand(0), Lo, Hi}, Flags);
8567 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8568 DAG.getVTList(MVT::i32, MVT::Other),
8569 {Res.getValue(1), Res}, Flags);
8570 } else {
8571 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8572 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8573 }
8574 } else {
8575 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8576 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8577 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8578 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8579 if (IsStrict) {
8580 // Sel = Src < 0x80000000
8581 // FltOfs = select Sel, 0.0, 0x80000000
8582 // IntOfs = select Sel, 0, 0x80000000
8583 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8584 SDValue Chain = Op.getOperand(0);
8585 EVT SetCCVT =
8586 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8587 EVT DstSetCCVT =
8588 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8589 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8590 Chain, true);
8591 Chain = Sel.getValue(1);
8592
8593 SDValue FltOfs = DAG.getSelect(
8594 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8595 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8596
8597 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8598 DAG.getVTList(SrcVT, MVT::Other),
8599 {Chain, Src, FltOfs}, Flags);
8600 Chain = Val.getValue(1);
8601 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8602 DAG.getVTList(DstVT, MVT::Other),
8603 {Chain, Val}, Flags);
8604 Chain = SInt.getValue(1);
8605 SDValue IntOfs = DAG.getSelect(
8606 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8607 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8608 return DAG.getMergeValues({Result, Chain}, dl);
8609 } else {
8610 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8611 // FIXME: generated code sucks.
8612 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8613 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8614 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8615 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8616 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8617 }
8618 }
8619 }
8620
8621 return SDValue();
8622 }
8623
8624 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8625 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8626
8627 ReuseLoadInfo RLI;
8628 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8629
8630 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8631 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8632}
8633
8634// We're trying to insert a regular store, S, and then a load, L. If the
8635// incoming value, O, is a load, we might just be able to have our load use the
8636// address used by O. However, we don't know if anything else will store to
8637// that address before we can load from it. To prevent this situation, we need
8638// to insert our load, L, into the chain as a peer of O. To do this, we give L
8639// the same chain operand as O, we create a token factor from the chain results
8640// of O and L, and we replace all uses of O's chain result with that token
8641// factor (this last part is handled by makeEquivalentMemoryOrdering).
8642bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8643 ReuseLoadInfo &RLI,
8644 SelectionDAG &DAG,
8645 ISD::LoadExtType ET) const {
8646 // Conservatively skip reusing for constrained FP nodes.
8647 if (Op->isStrictFPOpcode())
8648 return false;
8649
8650 SDLoc dl(Op);
8651 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8652 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8653 if (ET == ISD::NON_EXTLOAD &&
8654 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8655 isOperationLegalOrCustom(Op.getOpcode(),
8656 Op.getOperand(0).getValueType())) {
8657
8658 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8659 return true;
8660 }
8661
8662 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8663 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8664 LD->isNonTemporal())
8665 return false;
8666 if (LD->getMemoryVT() != MemVT)
8667 return false;
8668
8669 // If the result of the load is an illegal type, then we can't build a
8670 // valid chain for reuse since the legalised loads and token factor node that
8671 // ties the legalised loads together uses a different output chain then the
8672 // illegal load.
8673 if (!isTypeLegal(LD->getValueType(0)))
8674 return false;
8675
8676 RLI.Ptr = LD->getBasePtr();
8677 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8678 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8679 "Non-pre-inc AM on PPC?");
8680 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8681 LD->getOffset());
8682 }
8683
8684 RLI.Chain = LD->getChain();
8685 RLI.MPI = LD->getPointerInfo();
8686 RLI.IsDereferenceable = LD->isDereferenceable();
8687 RLI.IsInvariant = LD->isInvariant();
8688 RLI.Alignment = LD->getAlign();
8689 RLI.AAInfo = LD->getAAInfo();
8690 RLI.Ranges = LD->getRanges();
8691
8692 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8693 return true;
8694}
8695
8696/// Analyze profitability of direct move
8697/// prefer float load to int load plus direct move
8698/// when there is no integer use of int load
8699bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8700 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8701 if (Origin->getOpcode() != ISD::LOAD)
8702 return true;
8703
8704 // If there is no LXSIBZX/LXSIHZX, like Power8,
8705 // prefer direct move if the memory size is 1 or 2 bytes.
8706 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8707 if (!Subtarget.hasP9Vector() &&
8708 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8709 return true;
8710
8711 for (SDUse &Use : Origin->uses()) {
8712
8713 // Only look at the users of the loaded value.
8714 if (Use.getResNo() != 0)
8715 continue;
8716
8717 SDNode *User = Use.getUser();
8718 if (User->getOpcode() != ISD::SINT_TO_FP &&
8719 User->getOpcode() != ISD::UINT_TO_FP &&
8720 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8721 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8722 return true;
8723 }
8724
8725 return false;
8726}
8727
8729 const PPCSubtarget &Subtarget,
8730 SDValue Chain = SDValue()) {
8731 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8732 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8733 SDLoc dl(Op);
8734
8735 // TODO: Any other flags to propagate?
8736 SDNodeFlags Flags;
8737 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8738
8739 // If we have FCFIDS, then use it when converting to single-precision.
8740 // Otherwise, convert to double-precision and then round.
8741 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8742 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8743 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8744 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8745 if (Op->isStrictFPOpcode()) {
8746 if (!Chain)
8747 Chain = Op.getOperand(0);
8748 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8749 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8750 } else
8751 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8752}
8753
8754/// Custom lowers integer to floating point conversions to use
8755/// the direct move instructions available in ISA 2.07 to avoid the
8756/// need for load/store combinations.
8757SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8758 SelectionDAG &DAG,
8759 const SDLoc &dl) const {
8760 assert((Op.getValueType() == MVT::f32 ||
8761 Op.getValueType() == MVT::f64) &&
8762 "Invalid floating point type as target of conversion");
8763 assert(Subtarget.hasFPCVT() &&
8764 "Int to FP conversions with direct moves require FPCVT");
8765 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8766 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8767 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8768 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8769 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8770 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8771 return convertIntToFP(Op, Mov, DAG, Subtarget);
8772}
8773
8774static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8775
8776 EVT VecVT = Vec.getValueType();
8777 assert(VecVT.isVector() && "Expected a vector type.");
8778 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8779
8780 EVT EltVT = VecVT.getVectorElementType();
8781 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8782 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8783
8784 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8785 SmallVector<SDValue, 16> Ops(NumConcat);
8786 Ops[0] = Vec;
8787 SDValue UndefVec = DAG.getUNDEF(VecVT);
8788 for (unsigned i = 1; i < NumConcat; ++i)
8789 Ops[i] = UndefVec;
8790
8791 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8792}
8793
8794SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8795 const SDLoc &dl) const {
8796 bool IsStrict = Op->isStrictFPOpcode();
8797 unsigned Opc = Op.getOpcode();
8798 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8801 "Unexpected conversion type");
8802 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8803 "Supports conversions to v2f64/v4f32 only.");
8804
8805 // TODO: Any other flags to propagate?
8807 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8808
8809 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8810 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8811
8812 SDValue Wide = widenVec(DAG, Src, dl);
8813 EVT WideVT = Wide.getValueType();
8814 unsigned WideNumElts = WideVT.getVectorNumElements();
8815 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8816
8817 SmallVector<int, 16> ShuffV;
8818 for (unsigned i = 0; i < WideNumElts; ++i)
8819 ShuffV.push_back(i + WideNumElts);
8820
8821 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8822 int SaveElts = FourEltRes ? 4 : 2;
8823 if (Subtarget.isLittleEndian())
8824 for (int i = 0; i < SaveElts; i++)
8825 ShuffV[i * Stride] = i;
8826 else
8827 for (int i = 1; i <= SaveElts; i++)
8828 ShuffV[i * Stride - 1] = i - 1;
8829
8830 SDValue ShuffleSrc2 =
8831 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8832 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8833
8834 SDValue Extend;
8835 if (SignedConv) {
8836 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8837 EVT ExtVT = Src.getValueType();
8838 if (Subtarget.hasP9Altivec())
8839 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8840 IntermediateVT.getVectorNumElements());
8841
8842 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8843 DAG.getValueType(ExtVT));
8844 } else
8845 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8846
8847 if (IsStrict)
8848 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8849 {Op.getOperand(0), Extend}, Flags);
8850
8851 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8852}
8853
8854SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8855 SelectionDAG &DAG) const {
8856 SDLoc dl(Op);
8857 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8858 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8859 bool IsStrict = Op->isStrictFPOpcode();
8860 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8861 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8862
8863 // TODO: Any other flags to propagate?
8865 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8866
8867 EVT InVT = Src.getValueType();
8868 EVT OutVT = Op.getValueType();
8869 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8870 isOperationCustom(Op.getOpcode(), InVT))
8871 return LowerINT_TO_FPVector(Op, DAG, dl);
8872
8873 // Conversions to f128 are legal.
8874 if (Op.getValueType() == MVT::f128)
8875 return Subtarget.hasP9Vector() ? Op : SDValue();
8876
8877 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8878 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8879 return SDValue();
8880
8881 if (Src.getValueType() == MVT::i1) {
8882 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8883 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8884 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8885 if (IsStrict)
8886 return DAG.getMergeValues({Sel, Chain}, dl);
8887 else
8888 return Sel;
8889 }
8890
8891 // If we have direct moves, we can do all the conversion, skip the store/load
8892 // however, without FPCVT we can't do most conversions.
8893 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8894 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8895 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8896
8897 assert((IsSigned || Subtarget.hasFPCVT()) &&
8898 "UINT_TO_FP is supported only with FPCVT");
8899
8900 if (Src.getValueType() == MVT::i64) {
8901 SDValue SINT = Src;
8902 // When converting to single-precision, we actually need to convert
8903 // to double-precision first and then round to single-precision.
8904 // To avoid double-rounding effects during that operation, we have
8905 // to prepare the input operand. Bits that might be truncated when
8906 // converting to double-precision are replaced by a bit that won't
8907 // be lost at this stage, but is below the single-precision rounding
8908 // position.
8909 //
8910 // However, if -enable-unsafe-fp-math is in effect, accept double
8911 // rounding to avoid the extra overhead.
8912 if (Op.getValueType() == MVT::f32 &&
8913 !Subtarget.hasFPCVT() &&
8915
8916 // Twiddle input to make sure the low 11 bits are zero. (If this
8917 // is the case, we are guaranteed the value will fit into the 53 bit
8918 // mantissa of an IEEE double-precision value without rounding.)
8919 // If any of those low 11 bits were not zero originally, make sure
8920 // bit 12 (value 2048) is set instead, so that the final rounding
8921 // to single-precision gets the correct result.
8922 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8923 SINT, DAG.getConstant(2047, dl, MVT::i64));
8924 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8925 Round, DAG.getConstant(2047, dl, MVT::i64));
8926 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8927 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8928 DAG.getSignedConstant(-2048, dl, MVT::i64));
8929
8930 // However, we cannot use that value unconditionally: if the magnitude
8931 // of the input value is small, the bit-twiddling we did above might
8932 // end up visibly changing the output. Fortunately, in that case, we
8933 // don't need to twiddle bits since the original input will convert
8934 // exactly to double-precision floating-point already. Therefore,
8935 // construct a conditional to use the original value if the top 11
8936 // bits are all sign-bit copies, and use the rounded value computed
8937 // above otherwise.
8938 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8939 SINT, DAG.getConstant(53, dl, MVT::i32));
8940 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8941 Cond, DAG.getConstant(1, dl, MVT::i64));
8942 Cond = DAG.getSetCC(
8943 dl,
8944 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8945 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8946
8947 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8948 }
8949
8950 ReuseLoadInfo RLI;
8951 SDValue Bits;
8952
8954 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8955 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8956 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8957 if (RLI.ResChain)
8958 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8959 } else if (Subtarget.hasLFIWAX() &&
8960 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8961 MachineMemOperand *MMO =
8963 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8964 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8966 DAG.getVTList(MVT::f64, MVT::Other),
8967 Ops, MVT::i32, MMO);
8968 if (RLI.ResChain)
8969 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8970 } else if (Subtarget.hasFPCVT() &&
8971 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8972 MachineMemOperand *MMO =
8974 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8975 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8977 DAG.getVTList(MVT::f64, MVT::Other),
8978 Ops, MVT::i32, MMO);
8979 if (RLI.ResChain)
8980 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8981 } else if (((Subtarget.hasLFIWAX() &&
8982 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8983 (Subtarget.hasFPCVT() &&
8984 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8985 SINT.getOperand(0).getValueType() == MVT::i32) {
8986 MachineFrameInfo &MFI = MF.getFrameInfo();
8987 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8988
8989 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8990 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8991
8992 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8994 DAG.getMachineFunction(), FrameIdx));
8995 Chain = Store;
8996
8997 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8998 "Expected an i32 store");
8999
9000 RLI.Ptr = FIdx;
9001 RLI.Chain = Chain;
9002 RLI.MPI =
9004 RLI.Alignment = Align(4);
9005
9006 MachineMemOperand *MMO =
9008 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9009 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9012 dl, DAG.getVTList(MVT::f64, MVT::Other),
9013 Ops, MVT::i32, MMO);
9014 Chain = Bits.getValue(1);
9015 } else
9016 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
9017
9018 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
9019 if (IsStrict)
9020 Chain = FP.getValue(1);
9021
9022 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9023 if (IsStrict)
9024 FP = DAG.getNode(
9025 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9026 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9027 Flags);
9028 else
9029 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9030 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9031 }
9032 return FP;
9033 }
9034
9035 assert(Src.getValueType() == MVT::i32 &&
9036 "Unhandled INT_TO_FP type in custom expander!");
9037 // Since we only generate this in 64-bit mode, we can take advantage of
9038 // 64-bit registers. In particular, sign extend the input value into the
9039 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9040 // then lfd it and fcfid it.
9042 MachineFrameInfo &MFI = MF.getFrameInfo();
9043 EVT PtrVT = getPointerTy(MF.getDataLayout());
9044
9045 SDValue Ld;
9046 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9047 ReuseLoadInfo RLI;
9048 bool ReusingLoad;
9049 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9050 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9051 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9052
9053 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9055 DAG.getMachineFunction(), FrameIdx));
9056 Chain = Store;
9057
9058 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9059 "Expected an i32 store");
9060
9061 RLI.Ptr = FIdx;
9062 RLI.Chain = Chain;
9063 RLI.MPI =
9065 RLI.Alignment = Align(4);
9066 }
9067
9068 MachineMemOperand *MMO =
9070 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9071 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9072 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9073 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9074 MVT::i32, MMO);
9075 Chain = Ld.getValue(1);
9076 if (ReusingLoad && RLI.ResChain) {
9077 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9078 }
9079 } else {
9080 assert(Subtarget.isPPC64() &&
9081 "i32->FP without LFIWAX supported only on PPC64");
9082
9083 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9084 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9085
9086 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9087
9088 // STD the extended value into the stack slot.
9089 SDValue Store = DAG.getStore(
9090 Chain, dl, Ext64, FIdx,
9092 Chain = Store;
9093
9094 // Load the value as a double.
9095 Ld = DAG.getLoad(
9096 MVT::f64, dl, Chain, FIdx,
9098 Chain = Ld.getValue(1);
9099 }
9100
9101 // FCFID it and return it.
9102 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9103 if (IsStrict)
9104 Chain = FP.getValue(1);
9105 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9106 if (IsStrict)
9107 FP = DAG.getNode(
9108 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9109 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9110 else
9111 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9112 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9113 }
9114 return FP;
9115}
9116
9117SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9118 SelectionDAG &DAG) const {
9119 SDLoc Dl(Op);
9121 EVT PtrVT = getPointerTy(MF.getDataLayout());
9122 SDValue Chain = Op.getOperand(0);
9123
9124 // If requested mode is constant, just use simpler mtfsb/mffscrni
9125 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9126 uint64_t Mode = CVal->getZExtValue();
9127 assert(Mode < 4 && "Unsupported rounding mode!");
9128 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9129 if (Subtarget.isISA3_0())
9130 return SDValue(
9131 DAG.getMachineNode(
9132 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9133 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9134 1);
9135 SDNode *SetHi = DAG.getMachineNode(
9136 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9137 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9138 SDNode *SetLo = DAG.getMachineNode(
9139 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9140 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9141 return SDValue(SetLo, 0);
9142 }
9143
9144 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9145 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9146 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9147 DAG.getConstant(3, Dl, MVT::i32));
9148 SDValue DstFlag = DAG.getNode(
9149 ISD::XOR, Dl, MVT::i32, SrcFlag,
9150 DAG.getNode(ISD::AND, Dl, MVT::i32,
9151 DAG.getNOT(Dl,
9152 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9153 MVT::i32),
9154 One));
9155 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9156 SDValue MFFS;
9157 if (!Subtarget.isISA3_0()) {
9158 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9159 Chain = MFFS.getValue(1);
9160 }
9161 SDValue NewFPSCR;
9162 if (Subtarget.isPPC64()) {
9163 if (Subtarget.isISA3_0()) {
9164 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9165 } else {
9166 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9167 SDNode *InsertRN = DAG.getMachineNode(
9168 PPC::RLDIMI, Dl, MVT::i64,
9169 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9170 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9171 DAG.getTargetConstant(0, Dl, MVT::i32),
9172 DAG.getTargetConstant(62, Dl, MVT::i32)});
9173 NewFPSCR = SDValue(InsertRN, 0);
9174 }
9175 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9176 } else {
9177 // In 32-bit mode, store f64, load and update the lower half.
9178 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9179 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9180 SDValue Addr = Subtarget.isLittleEndian()
9181 ? StackSlot
9182 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9183 DAG.getConstant(4, Dl, PtrVT));
9184 if (Subtarget.isISA3_0()) {
9185 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9186 } else {
9187 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9188 SDValue Tmp =
9189 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9190 Chain = Tmp.getValue(1);
9191 Tmp = SDValue(DAG.getMachineNode(
9192 PPC::RLWIMI, Dl, MVT::i32,
9193 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9194 DAG.getTargetConstant(30, Dl, MVT::i32),
9195 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9196 0);
9197 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9198 }
9199 NewFPSCR =
9200 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9201 Chain = NewFPSCR.getValue(1);
9202 }
9203 if (Subtarget.isISA3_0())
9204 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9205 {NewFPSCR, Chain}),
9206 1);
9207 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9208 SDNode *MTFSF = DAG.getMachineNode(
9209 PPC::MTFSF, Dl, MVT::Other,
9210 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9211 return SDValue(MTFSF, 0);
9212}
9213
9214SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9215 SelectionDAG &DAG) const {
9216 SDLoc dl(Op);
9217 /*
9218 The rounding mode is in bits 30:31 of FPSR, and has the following
9219 settings:
9220 00 Round to nearest
9221 01 Round to 0
9222 10 Round to +inf
9223 11 Round to -inf
9224
9225 GET_ROUNDING, on the other hand, expects the following:
9226 -1 Undefined
9227 0 Round to 0
9228 1 Round to nearest
9229 2 Round to +inf
9230 3 Round to -inf
9231
9232 To perform the conversion, we do:
9233 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9234 */
9235
9237 EVT VT = Op.getValueType();
9238 EVT PtrVT = getPointerTy(MF.getDataLayout());
9239
9240 // Save FP Control Word to register
9241 SDValue Chain = Op.getOperand(0);
9242 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9243 Chain = MFFS.getValue(1);
9244
9245 SDValue CWD;
9246 if (isTypeLegal(MVT::i64)) {
9247 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9248 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9249 } else {
9250 // Save FP register to stack slot
9251 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9252 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9253 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9254
9255 // Load FP Control Word from low 32 bits of stack slot.
9257 "Stack slot adjustment is valid only on big endian subtargets!");
9258 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9259 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9260 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9261 Chain = CWD.getValue(1);
9262 }
9263
9264 // Transform as necessary
9265 SDValue CWD1 =
9266 DAG.getNode(ISD::AND, dl, MVT::i32,
9267 CWD, DAG.getConstant(3, dl, MVT::i32));
9268 SDValue CWD2 =
9269 DAG.getNode(ISD::SRL, dl, MVT::i32,
9270 DAG.getNode(ISD::AND, dl, MVT::i32,
9271 DAG.getNode(ISD::XOR, dl, MVT::i32,
9272 CWD, DAG.getConstant(3, dl, MVT::i32)),
9273 DAG.getConstant(3, dl, MVT::i32)),
9274 DAG.getConstant(1, dl, MVT::i32));
9275
9276 SDValue RetVal =
9277 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9278
9279 RetVal =
9281 dl, VT, RetVal);
9282
9283 return DAG.getMergeValues({RetVal, Chain}, dl);
9284}
9285
9286SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9287 EVT VT = Op.getValueType();
9289 SDLoc dl(Op);
9290 assert(Op.getNumOperands() == 3 &&
9291 VT == Op.getOperand(1).getValueType() &&
9292 "Unexpected SHL!");
9293
9294 // Expand into a bunch of logical ops. Note that these ops
9295 // depend on the PPC behavior for oversized shift amounts.
9296 SDValue Lo = Op.getOperand(0);
9297 SDValue Hi = Op.getOperand(1);
9298 SDValue Amt = Op.getOperand(2);
9299 EVT AmtVT = Amt.getValueType();
9300
9301 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9302 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9303 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9304 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9305 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9306 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9307 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9308 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9309 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9310 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9311 SDValue OutOps[] = { OutLo, OutHi };
9312 return DAG.getMergeValues(OutOps, dl);
9313}
9314
9315SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9316 EVT VT = Op.getValueType();
9317 SDLoc dl(Op);
9319 assert(Op.getNumOperands() == 3 &&
9320 VT == Op.getOperand(1).getValueType() &&
9321 "Unexpected SRL!");
9322
9323 // Expand into a bunch of logical ops. Note that these ops
9324 // depend on the PPC behavior for oversized shift amounts.
9325 SDValue Lo = Op.getOperand(0);
9326 SDValue Hi = Op.getOperand(1);
9327 SDValue Amt = Op.getOperand(2);
9328 EVT AmtVT = Amt.getValueType();
9329
9330 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9331 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9332 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9333 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9334 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9335 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9336 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9337 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9338 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9339 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9340 SDValue OutOps[] = { OutLo, OutHi };
9341 return DAG.getMergeValues(OutOps, dl);
9342}
9343
9344SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9345 SDLoc dl(Op);
9346 EVT VT = Op.getValueType();
9348 assert(Op.getNumOperands() == 3 &&
9349 VT == Op.getOperand(1).getValueType() &&
9350 "Unexpected SRA!");
9351
9352 // Expand into a bunch of logical ops, followed by a select_cc.
9353 SDValue Lo = Op.getOperand(0);
9354 SDValue Hi = Op.getOperand(1);
9355 SDValue Amt = Op.getOperand(2);
9356 EVT AmtVT = Amt.getValueType();
9357
9358 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9359 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9360 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9361 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9362 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9363 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9364 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9365 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9366 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9367 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9368 Tmp4, Tmp6, ISD::SETLE);
9369 SDValue OutOps[] = { OutLo, OutHi };
9370 return DAG.getMergeValues(OutOps, dl);
9371}
9372
9373SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9374 SelectionDAG &DAG) const {
9375 SDLoc dl(Op);
9376 EVT VT = Op.getValueType();
9377 unsigned BitWidth = VT.getSizeInBits();
9378
9379 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9380 SDValue X = Op.getOperand(0);
9381 SDValue Y = Op.getOperand(1);
9382 SDValue Z = Op.getOperand(2);
9383 EVT AmtVT = Z.getValueType();
9384
9385 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9386 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9387 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9388 // on PowerPC shift by BW being well defined.
9389 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9390 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9391 SDValue SubZ =
9392 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9393 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9394 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9395 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9396}
9397
9398//===----------------------------------------------------------------------===//
9399// Vector related lowering.
9400//
9401
9402/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9403/// element size of SplatSize. Cast the result to VT.
9404static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9405 SelectionDAG &DAG, const SDLoc &dl) {
9406 static const MVT VTys[] = { // canonical VT to use for each size.
9407 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9408 };
9409
9410 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9411
9412 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9413 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9414 SplatSize = 1;
9415 Val = 0xFF;
9416 }
9417
9418 EVT CanonicalVT = VTys[SplatSize-1];
9419
9420 // Build a canonical splat for this value.
9421 // Explicitly truncate APInt here, as this API is used with a mix of
9422 // signed and unsigned values.
9423 return DAG.getBitcast(
9424 ReqVT,
9425 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9426}
9427
9428/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9429/// specified intrinsic ID.
9431 const SDLoc &dl, EVT DestVT = MVT::Other) {
9432 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9433 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9434 DAG.getConstant(IID, dl, MVT::i32), Op);
9435}
9436
9437/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9438/// specified intrinsic ID.
9439static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9440 SelectionDAG &DAG, const SDLoc &dl,
9441 EVT DestVT = MVT::Other) {
9442 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9443 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9444 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9445}
9446
9447/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9448/// specified intrinsic ID.
9449static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9450 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9451 EVT DestVT = MVT::Other) {
9452 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9453 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9454 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9455}
9456
9457/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9458/// amount. The result has the specified value type.
9459static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9460 SelectionDAG &DAG, const SDLoc &dl) {
9461 // Force LHS/RHS to be the right type.
9462 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9463 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9464
9465 int Ops[16];
9466 for (unsigned i = 0; i != 16; ++i)
9467 Ops[i] = i + Amt;
9468 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9469 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9470}
9471
9472/// Do we have an efficient pattern in a .td file for this node?
9473///
9474/// \param V - pointer to the BuildVectorSDNode being matched
9475/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9476///
9477/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9478/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9479/// the opposite is true (expansion is beneficial) are:
9480/// - The node builds a vector out of integers that are not 32 or 64-bits
9481/// - The node builds a vector out of constants
9482/// - The node is a "load-and-splat"
9483/// In all other cases, we will choose to keep the BUILD_VECTOR.
9485 bool HasDirectMove,
9486 bool HasP8Vector) {
9487 EVT VecVT = V->getValueType(0);
9488 bool RightType = VecVT == MVT::v2f64 ||
9489 (HasP8Vector && VecVT == MVT::v4f32) ||
9490 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9491 if (!RightType)
9492 return false;
9493
9494 bool IsSplat = true;
9495 bool IsLoad = false;
9496 SDValue Op0 = V->getOperand(0);
9497
9498 // This function is called in a block that confirms the node is not a constant
9499 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9500 // different constants.
9501 if (V->isConstant())
9502 return false;
9503 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9504 if (V->getOperand(i).isUndef())
9505 return false;
9506 // We want to expand nodes that represent load-and-splat even if the
9507 // loaded value is a floating point truncation or conversion to int.
9508 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9509 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9510 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9511 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9512 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9513 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9514 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9515 IsLoad = true;
9516 // If the operands are different or the input is not a load and has more
9517 // uses than just this BV node, then it isn't a splat.
9518 if (V->getOperand(i) != Op0 ||
9519 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9520 IsSplat = false;
9521 }
9522 return !(IsSplat && IsLoad);
9523}
9524
9525// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9526SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9527
9528 SDLoc dl(Op);
9529 SDValue Op0 = Op->getOperand(0);
9530
9531 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9532 (Op.getValueType() != MVT::f128))
9533 return SDValue();
9534
9535 SDValue Lo = Op0.getOperand(0);
9536 SDValue Hi = Op0.getOperand(1);
9537 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9538 return SDValue();
9539
9540 if (!Subtarget.isLittleEndian())
9541 std::swap(Lo, Hi);
9542
9543 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9544}
9545
9546static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9547 const SDValue *InputLoad = &Op;
9548 while (InputLoad->getOpcode() == ISD::BITCAST)
9549 InputLoad = &InputLoad->getOperand(0);
9550 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9552 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9553 InputLoad = &InputLoad->getOperand(0);
9554 }
9555 if (InputLoad->getOpcode() != ISD::LOAD)
9556 return nullptr;
9557 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9558 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9559}
9560
9561// Convert the argument APFloat to a single precision APFloat if there is no
9562// loss in information during the conversion to single precision APFloat and the
9563// resulting number is not a denormal number. Return true if successful.
9565 APFloat APFloatToConvert = ArgAPFloat;
9566 bool LosesInfo = true;
9568 &LosesInfo);
9569 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9570 if (Success)
9571 ArgAPFloat = APFloatToConvert;
9572 return Success;
9573}
9574
9575// Bitcast the argument APInt to a double and convert it to a single precision
9576// APFloat, bitcast the APFloat to an APInt and assign it to the original
9577// argument if there is no loss in information during the conversion from
9578// double to single precision APFloat and the resulting number is not a denormal
9579// number. Return true if successful.
9581 double DpValue = ArgAPInt.bitsToDouble();
9582 APFloat APFloatDp(DpValue);
9583 bool Success = convertToNonDenormSingle(APFloatDp);
9584 if (Success)
9585 ArgAPInt = APFloatDp.bitcastToAPInt();
9586 return Success;
9587}
9588
9589// Nondestructive check for convertTonNonDenormSingle.
9591 // Only convert if it loses info, since XXSPLTIDP should
9592 // handle the other case.
9593 APFloat APFloatToConvert = ArgAPFloat;
9594 bool LosesInfo = true;
9596 &LosesInfo);
9597
9598 return (!LosesInfo && !APFloatToConvert.isDenormal());
9599}
9600
9601static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9602 unsigned &Opcode) {
9603 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9604 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9605 return false;
9606
9607 EVT Ty = Op->getValueType(0);
9608 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9609 // as we cannot handle extending loads for these types.
9610 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9611 ISD::isNON_EXTLoad(InputNode))
9612 return true;
9613
9614 EVT MemVT = InputNode->getMemoryVT();
9615 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9616 // memory VT is the same vector element VT type.
9617 // The loads feeding into the v8i16 and v16i8 types will be extending because
9618 // scalar i8/i16 are not legal types.
9619 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9620 (MemVT == Ty.getVectorElementType()))
9621 return true;
9622
9623 if (Ty == MVT::v2i64) {
9624 // Check the extend type, when the input type is i32, and the output vector
9625 // type is v2i64.
9626 if (MemVT == MVT::i32) {
9627 if (ISD::isZEXTLoad(InputNode))
9628 Opcode = PPCISD::ZEXT_LD_SPLAT;
9629 if (ISD::isSEXTLoad(InputNode))
9630 Opcode = PPCISD::SEXT_LD_SPLAT;
9631 }
9632 return true;
9633 }
9634 return false;
9635}
9636
9638 bool IsLittleEndian) {
9639 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9640
9641 BitMask.clearAllBits();
9642 EVT VT = BVN.getValueType(0);
9643 unsigned VTSize = VT.getSizeInBits();
9644 APInt ConstValue(VTSize, 0);
9645
9646 unsigned EltWidth = VT.getScalarSizeInBits();
9647
9648 unsigned BitPos = 0;
9649 for (auto OpVal : BVN.op_values()) {
9650 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9651
9652 if (!CN)
9653 return false;
9654 // The elements in a vector register are ordered in reverse byte order
9655 // between little-endian and big-endian modes.
9656 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9657 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9658 BitPos += EltWidth;
9659 }
9660
9661 for (unsigned J = 0; J < 16; ++J) {
9662 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9663 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9664 return false;
9665 if (ExtractValue == 0xFF)
9666 BitMask.setBit(J);
9667 }
9668 return true;
9669}
9670
9671// If this is a case we can't handle, return null and let the default
9672// expansion code take care of it. If we CAN select this case, and if it
9673// selects to a single instruction, return Op. Otherwise, if we can codegen
9674// this case more efficiently than a constant pool load, lower it to the
9675// sequence of ops that should be used.
9676SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9677 SelectionDAG &DAG) const {
9678 SDLoc dl(Op);
9679 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9680 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9681
9682 if (Subtarget.hasP10Vector()) {
9683 APInt BitMask(32, 0);
9684 // If the value of the vector is all zeros or all ones,
9685 // we do not convert it to MTVSRBMI.
9686 // The xxleqv instruction sets a vector with all ones.
9687 // The xxlxor instruction sets a vector with all zeros.
9688 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9689 BitMask != 0 && BitMask != 0xffff) {
9690 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9691 MachineSDNode *MSDNode =
9692 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9693 SDValue SDV = SDValue(MSDNode, 0);
9694 EVT DVT = BVN->getValueType(0);
9695 EVT SVT = SDV.getValueType();
9696 if (SVT != DVT) {
9697 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9698 }
9699 return SDV;
9700 }
9701 }
9702 // Check if this is a splat of a constant value.
9703 APInt APSplatBits, APSplatUndef;
9704 unsigned SplatBitSize;
9705 bool HasAnyUndefs;
9706 bool BVNIsConstantSplat =
9707 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9708 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9709
9710 // If it is a splat of a double, check if we can shrink it to a 32 bit
9711 // non-denormal float which when converted back to double gives us the same
9712 // double. This is to exploit the XXSPLTIDP instruction.
9713 // If we lose precision, we use XXSPLTI32DX.
9714 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9715 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9716 // Check the type first to short-circuit so we don't modify APSplatBits if
9717 // this block isn't executed.
9718 if ((Op->getValueType(0) == MVT::v2f64) &&
9719 convertToNonDenormSingle(APSplatBits)) {
9720 SDValue SplatNode = DAG.getNode(
9721 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9722 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9723 return DAG.getBitcast(Op.getValueType(), SplatNode);
9724 } else {
9725 // We may lose precision, so we have to use XXSPLTI32DX.
9726
9727 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9728 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9729 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9730
9731 if (!Hi || !Lo)
9732 // If either load is 0, then we should generate XXLXOR to set to 0.
9733 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9734
9735 if (Hi)
9736 SplatNode = DAG.getNode(
9737 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9738 DAG.getTargetConstant(0, dl, MVT::i32),
9739 DAG.getTargetConstant(Hi, dl, MVT::i32));
9740
9741 if (Lo)
9742 SplatNode =
9743 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9744 DAG.getTargetConstant(1, dl, MVT::i32),
9745 DAG.getTargetConstant(Lo, dl, MVT::i32));
9746
9747 return DAG.getBitcast(Op.getValueType(), SplatNode);
9748 }
9749 }
9750
9751 bool IsSplat64 = false;
9752 uint64_t SplatBits = 0;
9753 int32_t SextVal = 0;
9754 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9755 SplatBits = APSplatBits.getZExtValue();
9756 if (SplatBitSize <= 32) {
9757 SextVal = SignExtend32(SplatBits, SplatBitSize);
9758 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9759 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9760 bool P9Vector = Subtarget.hasP9Vector();
9761 int32_t Hi = P9Vector ? 127 : 15;
9762 int32_t Lo = P9Vector ? -128 : -16;
9763 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9764 SextVal = static_cast<int32_t>(SplatBits);
9765 }
9766 }
9767
9768 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9769 unsigned NewOpcode = PPCISD::LD_SPLAT;
9770
9771 // Handle load-and-splat patterns as we have instructions that will do this
9772 // in one go.
9773 if (DAG.isSplatValue(Op, true) &&
9774 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9775 const SDValue *InputLoad = &Op.getOperand(0);
9776 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9777
9778 // If the input load is an extending load, it will be an i32 -> i64
9779 // extending load and isValidSplatLoad() will update NewOpcode.
9780 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9781 unsigned ElementSize =
9782 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9783
9784 assert(((ElementSize == 2 * MemorySize)
9785 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9786 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9787 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9788 "Unmatched element size and opcode!\n");
9789
9790 // Checking for a single use of this load, we have to check for vector
9791 // width (128 bits) / ElementSize uses (since each operand of the
9792 // BUILD_VECTOR is a separate use of the value.
9793 unsigned NumUsesOfInputLD = 128 / ElementSize;
9794 for (SDValue BVInOp : Op->ops())
9795 if (BVInOp.isUndef())
9796 NumUsesOfInputLD--;
9797
9798 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9799 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9800 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9801 // 15", but function IsValidSplatLoad() now will only return true when
9802 // the data at index 0 is not nullptr. So we will not get into trouble for
9803 // these cases.
9804 //
9805 // case 1 - lfiwzx/lfiwax
9806 // 1.1: load result is i32 and is sign/zero extend to i64;
9807 // 1.2: build a v2i64 vector type with above loaded value;
9808 // 1.3: the vector has only one value at index 0, others are all undef;
9809 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9810 if (NumUsesOfInputLD == 1 &&
9811 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9812 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9813 Subtarget.hasLFIWAX()))
9814 return SDValue();
9815
9816 // case 2 - lxvr[hb]x
9817 // 2.1: load result is at most i16;
9818 // 2.2: build a vector with above loaded value;
9819 // 2.3: the vector has only one value at index 0, others are all undef;
9820 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9821 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9822 Subtarget.isISA3_1() && ElementSize <= 16)
9823 return SDValue();
9824
9825 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9826 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9827 Subtarget.hasVSX()) {
9828 SDValue Ops[] = {
9829 LD->getChain(), // Chain
9830 LD->getBasePtr(), // Ptr
9831 DAG.getValueType(Op.getValueType()) // VT
9832 };
9833 SDValue LdSplt = DAG.getMemIntrinsicNode(
9834 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9835 LD->getMemoryVT(), LD->getMemOperand());
9836 // Replace all uses of the output chain of the original load with the
9837 // output chain of the new load.
9838 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9839 LdSplt.getValue(1));
9840 return LdSplt;
9841 }
9842 }
9843
9844 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9845 // 32-bits can be lowered to VSX instructions under certain conditions.
9846 // Without VSX, there is no pattern more efficient than expanding the node.
9847 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9848 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9849 Subtarget.hasP8Vector()))
9850 return Op;
9851 return SDValue();
9852 }
9853
9854 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9855 unsigned SplatSize = SplatBitSize / 8;
9856
9857 // First, handle single instruction cases.
9858
9859 // All zeros?
9860 if (SplatBits == 0) {
9861 // Canonicalize all zero vectors to be v4i32.
9862 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9863 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9864 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9865 }
9866 return Op;
9867 }
9868
9869 // We have XXSPLTIW for constant splats four bytes wide.
9870 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9871 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9872 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9873 // turned into a 4-byte splat of 0xABABABAB.
9874 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9875 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9876 Op.getValueType(), DAG, dl);
9877
9878 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9879 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9880 dl);
9881
9882 // We have XXSPLTIB for constant splats one byte wide.
9883 if (Subtarget.hasP9Vector() && SplatSize == 1)
9884 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9885 dl);
9886
9887 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9888 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9889 if (SextVal >= -16 && SextVal <= 15) {
9890 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9891 // generate a splat word with extend for size 8.
9892 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9893 SDValue Res =
9894 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9895 if (SplatSize != 8)
9896 return Res;
9897 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9898 }
9899
9900 // Two instruction sequences.
9901
9902 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9903 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9904 SmallVector<SDValue, 16> Ops(16, C);
9905 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9906 unsigned IID;
9907 switch (SplatSize) {
9908 default:
9909 llvm_unreachable("Unexpected type for vector constant.");
9910 case 2:
9911 IID = Intrinsic::ppc_altivec_vupklsb;
9912 break;
9913 case 4:
9914 IID = Intrinsic::ppc_altivec_vextsb2w;
9915 break;
9916 case 8:
9917 IID = Intrinsic::ppc_altivec_vextsb2d;
9918 break;
9919 }
9920 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl);
9921 return DAG.getBitcast(Op->getValueType(0), Extend);
9922 }
9923 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9924
9925 // If this value is in the range [-32,30] and is even, use:
9926 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9927 // If this value is in the range [17,31] and is odd, use:
9928 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9929 // If this value is in the range [-31,-17] and is odd, use:
9930 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9931 // Note the last two are three-instruction sequences.
9932 if (SextVal >= -32 && SextVal <= 31) {
9933 // To avoid having these optimizations undone by constant folding,
9934 // we convert to a pseudo that will be expanded later into one of
9935 // the above forms.
9936 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9937 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9938 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9939 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9940 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9941 if (VT == Op.getValueType())
9942 return RetVal;
9943 else
9944 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9945 }
9946
9947 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9948 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9949 // for fneg/fabs.
9950 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9951 // Make -1 and vspltisw -1:
9952 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9953
9954 // Make the VSLW intrinsic, computing 0x8000_0000.
9955 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9956 OnesV, DAG, dl);
9957
9958 // xor by OnesV to invert it.
9959 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9960 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9961 }
9962
9963 // Check to see if this is a wide variety of vsplti*, binop self cases.
9964 static const signed char SplatCsts[] = {
9965 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9966 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9967 };
9968
9969 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9970 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9971 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9972 int i = SplatCsts[idx];
9973
9974 // Figure out what shift amount will be used by altivec if shifted by i in
9975 // this splat size.
9976 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9977
9978 // vsplti + shl self.
9979 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9980 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9981 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9982 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9983 Intrinsic::ppc_altivec_vslw
9984 };
9985 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9986 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9987 }
9988
9989 // vsplti + srl self.
9990 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9991 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9992 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9993 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9994 Intrinsic::ppc_altivec_vsrw
9995 };
9996 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9997 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9998 }
9999
10000 // vsplti + rol self.
10001 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
10002 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
10003 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10004 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10005 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
10006 Intrinsic::ppc_altivec_vrlw
10007 };
10008 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10009 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10010 }
10011
10012 // t = vsplti c, result = vsldoi t, t, 1
10013 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
10014 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10015 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
10016 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10017 }
10018 // t = vsplti c, result = vsldoi t, t, 2
10019 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
10020 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10021 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
10022 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10023 }
10024 // t = vsplti c, result = vsldoi t, t, 3
10025 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
10026 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10027 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
10028 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10029 }
10030 }
10031
10032 return SDValue();
10033}
10034
10035/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
10036/// the specified operations to build the shuffle.
10037static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
10038 SDValue RHS, SelectionDAG &DAG,
10039 const SDLoc &dl) {
10040 unsigned OpNum = (PFEntry >> 26) & 0x0F;
10041 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
10042 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
10043
10044 enum {
10045 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
10046 OP_VMRGHW,
10047 OP_VMRGLW,
10048 OP_VSPLTISW0,
10049 OP_VSPLTISW1,
10050 OP_VSPLTISW2,
10051 OP_VSPLTISW3,
10052 OP_VSLDOI4,
10053 OP_VSLDOI8,
10054 OP_VSLDOI12
10055 };
10056
10057 if (OpNum == OP_COPY) {
10058 if (LHSID == (1*9+2)*9+3) return LHS;
10059 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10060 return RHS;
10061 }
10062
10063 SDValue OpLHS, OpRHS;
10064 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10065 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10066
10067 int ShufIdxs[16];
10068 switch (OpNum) {
10069 default: llvm_unreachable("Unknown i32 permute!");
10070 case OP_VMRGHW:
10071 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10072 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10073 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10074 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10075 break;
10076 case OP_VMRGLW:
10077 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10078 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10079 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10080 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10081 break;
10082 case OP_VSPLTISW0:
10083 for (unsigned i = 0; i != 16; ++i)
10084 ShufIdxs[i] = (i&3)+0;
10085 break;
10086 case OP_VSPLTISW1:
10087 for (unsigned i = 0; i != 16; ++i)
10088 ShufIdxs[i] = (i&3)+4;
10089 break;
10090 case OP_VSPLTISW2:
10091 for (unsigned i = 0; i != 16; ++i)
10092 ShufIdxs[i] = (i&3)+8;
10093 break;
10094 case OP_VSPLTISW3:
10095 for (unsigned i = 0; i != 16; ++i)
10096 ShufIdxs[i] = (i&3)+12;
10097 break;
10098 case OP_VSLDOI4:
10099 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10100 case OP_VSLDOI8:
10101 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10102 case OP_VSLDOI12:
10103 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10104 }
10105 EVT VT = OpLHS.getValueType();
10106 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10107 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10108 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10109 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10110}
10111
10112/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10113/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10114/// SDValue.
10115SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10116 SelectionDAG &DAG) const {
10117 const unsigned BytesInVector = 16;
10118 bool IsLE = Subtarget.isLittleEndian();
10119 SDLoc dl(N);
10120 SDValue V1 = N->getOperand(0);
10121 SDValue V2 = N->getOperand(1);
10122 unsigned ShiftElts = 0, InsertAtByte = 0;
10123 bool Swap = false;
10124
10125 // Shifts required to get the byte we want at element 7.
10126 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10127 0, 15, 14, 13, 12, 11, 10, 9};
10128 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10129 1, 2, 3, 4, 5, 6, 7, 8};
10130
10131 ArrayRef<int> Mask = N->getMask();
10132 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10133
10134 // For each mask element, find out if we're just inserting something
10135 // from V2 into V1 or vice versa.
10136 // Possible permutations inserting an element from V2 into V1:
10137 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10138 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10139 // ...
10140 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10141 // Inserting from V1 into V2 will be similar, except mask range will be
10142 // [16,31].
10143
10144 bool FoundCandidate = false;
10145 // If both vector operands for the shuffle are the same vector, the mask
10146 // will contain only elements from the first one and the second one will be
10147 // undef.
10148 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10149 // Go through the mask of half-words to find an element that's being moved
10150 // from one vector to the other.
10151 for (unsigned i = 0; i < BytesInVector; ++i) {
10152 unsigned CurrentElement = Mask[i];
10153 // If 2nd operand is undefined, we should only look for element 7 in the
10154 // Mask.
10155 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10156 continue;
10157
10158 bool OtherElementsInOrder = true;
10159 // Examine the other elements in the Mask to see if they're in original
10160 // order.
10161 for (unsigned j = 0; j < BytesInVector; ++j) {
10162 if (j == i)
10163 continue;
10164 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10165 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10166 // in which we always assume we're always picking from the 1st operand.
10167 int MaskOffset =
10168 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10169 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10170 OtherElementsInOrder = false;
10171 break;
10172 }
10173 }
10174 // If other elements are in original order, we record the number of shifts
10175 // we need to get the element we want into element 7. Also record which byte
10176 // in the vector we should insert into.
10177 if (OtherElementsInOrder) {
10178 // If 2nd operand is undefined, we assume no shifts and no swapping.
10179 if (V2.isUndef()) {
10180 ShiftElts = 0;
10181 Swap = false;
10182 } else {
10183 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10184 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10185 : BigEndianShifts[CurrentElement & 0xF];
10186 Swap = CurrentElement < BytesInVector;
10187 }
10188 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10189 FoundCandidate = true;
10190 break;
10191 }
10192 }
10193
10194 if (!FoundCandidate)
10195 return SDValue();
10196
10197 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10198 // optionally with VECSHL if shift is required.
10199 if (Swap)
10200 std::swap(V1, V2);
10201 if (V2.isUndef())
10202 V2 = V1;
10203 if (ShiftElts) {
10204 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10205 DAG.getConstant(ShiftElts, dl, MVT::i32));
10206 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10207 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10208 }
10209 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10210 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10211}
10212
10213/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10214/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10215/// SDValue.
10216SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10217 SelectionDAG &DAG) const {
10218 const unsigned NumHalfWords = 8;
10219 const unsigned BytesInVector = NumHalfWords * 2;
10220 // Check that the shuffle is on half-words.
10221 if (!isNByteElemShuffleMask(N, 2, 1))
10222 return SDValue();
10223
10224 bool IsLE = Subtarget.isLittleEndian();
10225 SDLoc dl(N);
10226 SDValue V1 = N->getOperand(0);
10227 SDValue V2 = N->getOperand(1);
10228 unsigned ShiftElts = 0, InsertAtByte = 0;
10229 bool Swap = false;
10230
10231 // Shifts required to get the half-word we want at element 3.
10232 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10233 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10234
10235 uint32_t Mask = 0;
10236 uint32_t OriginalOrderLow = 0x1234567;
10237 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10238 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10239 // 32-bit space, only need 4-bit nibbles per element.
10240 for (unsigned i = 0; i < NumHalfWords; ++i) {
10241 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10242 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10243 }
10244
10245 // For each mask element, find out if we're just inserting something
10246 // from V2 into V1 or vice versa. Possible permutations inserting an element
10247 // from V2 into V1:
10248 // X, 1, 2, 3, 4, 5, 6, 7
10249 // 0, X, 2, 3, 4, 5, 6, 7
10250 // 0, 1, X, 3, 4, 5, 6, 7
10251 // 0, 1, 2, X, 4, 5, 6, 7
10252 // 0, 1, 2, 3, X, 5, 6, 7
10253 // 0, 1, 2, 3, 4, X, 6, 7
10254 // 0, 1, 2, 3, 4, 5, X, 7
10255 // 0, 1, 2, 3, 4, 5, 6, X
10256 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10257
10258 bool FoundCandidate = false;
10259 // Go through the mask of half-words to find an element that's being moved
10260 // from one vector to the other.
10261 for (unsigned i = 0; i < NumHalfWords; ++i) {
10262 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10263 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10264 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10265 uint32_t TargetOrder = 0x0;
10266
10267 // If both vector operands for the shuffle are the same vector, the mask
10268 // will contain only elements from the first one and the second one will be
10269 // undef.
10270 if (V2.isUndef()) {
10271 ShiftElts = 0;
10272 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10273 TargetOrder = OriginalOrderLow;
10274 Swap = false;
10275 // Skip if not the correct element or mask of other elements don't equal
10276 // to our expected order.
10277 if (MaskOneElt == VINSERTHSrcElem &&
10278 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10279 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10280 FoundCandidate = true;
10281 break;
10282 }
10283 } else { // If both operands are defined.
10284 // Target order is [8,15] if the current mask is between [0,7].
10285 TargetOrder =
10286 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10287 // Skip if mask of other elements don't equal our expected order.
10288 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10289 // We only need the last 3 bits for the number of shifts.
10290 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10291 : BigEndianShifts[MaskOneElt & 0x7];
10292 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10293 Swap = MaskOneElt < NumHalfWords;
10294 FoundCandidate = true;
10295 break;
10296 }
10297 }
10298 }
10299
10300 if (!FoundCandidate)
10301 return SDValue();
10302
10303 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10304 // optionally with VECSHL if shift is required.
10305 if (Swap)
10306 std::swap(V1, V2);
10307 if (V2.isUndef())
10308 V2 = V1;
10309 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10310 if (ShiftElts) {
10311 // Double ShiftElts because we're left shifting on v16i8 type.
10312 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10313 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10314 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10315 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10316 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10317 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10318 }
10319 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10320 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10321 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10322 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10323}
10324
10325/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10326/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10327/// return the default SDValue.
10328SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10329 SelectionDAG &DAG) const {
10330 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10331 // to v16i8. Peek through the bitcasts to get the actual operands.
10334
10335 auto ShuffleMask = SVN->getMask();
10336 SDValue VecShuffle(SVN, 0);
10337 SDLoc DL(SVN);
10338
10339 // Check that we have a four byte shuffle.
10340 if (!isNByteElemShuffleMask(SVN, 4, 1))
10341 return SDValue();
10342
10343 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10344 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10345 std::swap(LHS, RHS);
10347 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10348 if (!CommutedSV)
10349 return SDValue();
10350 ShuffleMask = CommutedSV->getMask();
10351 }
10352
10353 // Ensure that the RHS is a vector of constants.
10354 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10355 if (!BVN)
10356 return SDValue();
10357
10358 // Check if RHS is a splat of 4-bytes (or smaller).
10359 APInt APSplatValue, APSplatUndef;
10360 unsigned SplatBitSize;
10361 bool HasAnyUndefs;
10362 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10363 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10364 SplatBitSize > 32)
10365 return SDValue();
10366
10367 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10368 // The instruction splats a constant C into two words of the source vector
10369 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10370 // Thus we check that the shuffle mask is the equivalent of
10371 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10372 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10373 // within each word are consecutive, so we only need to check the first byte.
10374 SDValue Index;
10375 bool IsLE = Subtarget.isLittleEndian();
10376 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10377 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10378 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10379 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10380 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10381 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10382 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10383 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10384 else
10385 return SDValue();
10386
10387 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10388 // for XXSPLTI32DX.
10389 unsigned SplatVal = APSplatValue.getZExtValue();
10390 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10391 SplatVal |= (SplatVal << SplatBitSize);
10392
10393 SDValue SplatNode = DAG.getNode(
10394 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10395 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10396 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10397}
10398
10399/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10400/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10401/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10402/// i.e (or (shl x, C1), (srl x, 128-C1)).
10403SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10404 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10405 assert(Op.getValueType() == MVT::v1i128 &&
10406 "Only set v1i128 as custom, other type shouldn't reach here!");
10407 SDLoc dl(Op);
10408 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10409 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10410 unsigned SHLAmt = N1.getConstantOperandVal(0);
10411 if (SHLAmt % 8 == 0) {
10412 std::array<int, 16> Mask;
10413 std::iota(Mask.begin(), Mask.end(), 0);
10414 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10415 if (SDValue Shuffle =
10416 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10417 DAG.getUNDEF(MVT::v16i8), Mask))
10418 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10419 }
10420 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10421 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10422 DAG.getConstant(SHLAmt, dl, MVT::i32));
10423 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10424 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10425 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10426 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10427}
10428
10429/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10430/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10431/// return the code it can be lowered into. Worst case, it can always be
10432/// lowered into a vperm.
10433SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10434 SelectionDAG &DAG) const {
10435 SDLoc dl(Op);
10436 SDValue V1 = Op.getOperand(0);
10437 SDValue V2 = Op.getOperand(1);
10438 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10439
10440 // Any nodes that were combined in the target-independent combiner prior
10441 // to vector legalization will not be sent to the target combine. Try to
10442 // combine it here.
10443 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10444 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10445 return NewShuffle;
10446 Op = NewShuffle;
10447 SVOp = cast<ShuffleVectorSDNode>(Op);
10448 V1 = Op.getOperand(0);
10449 V2 = Op.getOperand(1);
10450 }
10451 EVT VT = Op.getValueType();
10452 bool isLittleEndian = Subtarget.isLittleEndian();
10453
10454 unsigned ShiftElts, InsertAtByte;
10455 bool Swap = false;
10456
10457 // If this is a load-and-splat, we can do that with a single instruction
10458 // in some cases. However if the load has multiple uses, we don't want to
10459 // combine it because that will just produce multiple loads.
10460 bool IsPermutedLoad = false;
10461 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10462 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10463 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10464 InputLoad->hasOneUse()) {
10465 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10466 int SplatIdx =
10467 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10468
10469 // The splat index for permuted loads will be in the left half of the vector
10470 // which is strictly wider than the loaded value by 8 bytes. So we need to
10471 // adjust the splat index to point to the correct address in memory.
10472 if (IsPermutedLoad) {
10473 assert((isLittleEndian || IsFourByte) &&
10474 "Unexpected size for permuted load on big endian target");
10475 SplatIdx += IsFourByte ? 2 : 1;
10476 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10477 "Splat of a value outside of the loaded memory");
10478 }
10479
10480 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10481 // For 4-byte load-and-splat, we need Power9.
10482 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10483 uint64_t Offset = 0;
10484 if (IsFourByte)
10485 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10486 else
10487 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10488
10489 // If the width of the load is the same as the width of the splat,
10490 // loading with an offset would load the wrong memory.
10491 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10492 Offset = 0;
10493
10494 SDValue BasePtr = LD->getBasePtr();
10495 if (Offset != 0)
10497 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10498 SDValue Ops[] = {
10499 LD->getChain(), // Chain
10500 BasePtr, // BasePtr
10501 DAG.getValueType(Op.getValueType()) // VT
10502 };
10503 SDVTList VTL =
10504 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10505 SDValue LdSplt =
10507 Ops, LD->getMemoryVT(), LD->getMemOperand());
10508 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10509 if (LdSplt.getValueType() != SVOp->getValueType(0))
10510 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10511 return LdSplt;
10512 }
10513 }
10514
10515 // All v2i64 and v2f64 shuffles are legal
10516 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10517 return Op;
10518
10519 if (Subtarget.hasP9Vector() &&
10520 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10521 isLittleEndian)) {
10522 if (V2.isUndef())
10523 V2 = V1;
10524 else if (Swap)
10525 std::swap(V1, V2);
10526 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10527 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10528 if (ShiftElts) {
10529 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10530 DAG.getConstant(ShiftElts, dl, MVT::i32));
10531 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10532 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10533 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10534 }
10535 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10536 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10537 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10538 }
10539
10540 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10541 SDValue SplatInsertNode;
10542 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10543 return SplatInsertNode;
10544 }
10545
10546 if (Subtarget.hasP9Altivec()) {
10547 SDValue NewISDNode;
10548 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10549 return NewISDNode;
10550
10551 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10552 return NewISDNode;
10553 }
10554
10555 if (Subtarget.hasVSX() &&
10556 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10557 if (Swap)
10558 std::swap(V1, V2);
10559 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10560 SDValue Conv2 =
10561 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10562
10563 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10564 DAG.getConstant(ShiftElts, dl, MVT::i32));
10565 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10566 }
10567
10568 if (Subtarget.hasVSX() &&
10569 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10570 if (Swap)
10571 std::swap(V1, V2);
10572 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10573 SDValue Conv2 =
10574 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10575
10576 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10577 DAG.getConstant(ShiftElts, dl, MVT::i32));
10578 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10579 }
10580
10581 if (Subtarget.hasP9Vector()) {
10582 if (PPC::isXXBRHShuffleMask(SVOp)) {
10583 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10584 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10585 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10586 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10587 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10588 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10589 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10590 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10591 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10592 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10593 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10594 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10595 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10596 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10597 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10598 }
10599 }
10600
10601 if (Subtarget.hasVSX()) {
10602 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10603 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10604
10605 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10606 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10607 DAG.getConstant(SplatIdx, dl, MVT::i32));
10608 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10609 }
10610
10611 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10612 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10613 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10614 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10615 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10616 }
10617 }
10618
10619 // Cases that are handled by instructions that take permute immediates
10620 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10621 // selected by the instruction selector.
10622 if (V2.isUndef()) {
10623 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10624 PPC::isSplatShuffleMask(SVOp, 2) ||
10625 PPC::isSplatShuffleMask(SVOp, 4) ||
10626 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10627 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10628 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10629 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10630 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10631 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10632 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10633 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10634 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10635 (Subtarget.hasP8Altivec() && (
10636 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10637 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10638 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10639 return Op;
10640 }
10641 }
10642
10643 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10644 // and produce a fixed permutation. If any of these match, do not lower to
10645 // VPERM.
10646 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10647 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10648 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10649 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10650 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10651 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10652 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10653 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10654 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10655 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10656 (Subtarget.hasP8Altivec() && (
10657 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10658 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10659 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10660 return Op;
10661
10662 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10663 // perfect shuffle table to emit an optimal matching sequence.
10664 ArrayRef<int> PermMask = SVOp->getMask();
10665
10666 if (!DisablePerfectShuffle && !isLittleEndian) {
10667 unsigned PFIndexes[4];
10668 bool isFourElementShuffle = true;
10669 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10670 ++i) { // Element number
10671 unsigned EltNo = 8; // Start out undef.
10672 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10673 if (PermMask[i * 4 + j] < 0)
10674 continue; // Undef, ignore it.
10675
10676 unsigned ByteSource = PermMask[i * 4 + j];
10677 if ((ByteSource & 3) != j) {
10678 isFourElementShuffle = false;
10679 break;
10680 }
10681
10682 if (EltNo == 8) {
10683 EltNo = ByteSource / 4;
10684 } else if (EltNo != ByteSource / 4) {
10685 isFourElementShuffle = false;
10686 break;
10687 }
10688 }
10689 PFIndexes[i] = EltNo;
10690 }
10691
10692 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10693 // perfect shuffle vector to determine if it is cost effective to do this as
10694 // discrete instructions, or whether we should use a vperm.
10695 // For now, we skip this for little endian until such time as we have a
10696 // little-endian perfect shuffle table.
10697 if (isFourElementShuffle) {
10698 // Compute the index in the perfect shuffle table.
10699 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10700 PFIndexes[2] * 9 + PFIndexes[3];
10701
10702 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10703 unsigned Cost = (PFEntry >> 30);
10704
10705 // Determining when to avoid vperm is tricky. Many things affect the cost
10706 // of vperm, particularly how many times the perm mask needs to be
10707 // computed. For example, if the perm mask can be hoisted out of a loop or
10708 // is already used (perhaps because there are multiple permutes with the
10709 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10710 // permute mask out of the loop requires an extra register.
10711 //
10712 // As a compromise, we only emit discrete instructions if the shuffle can
10713 // be generated in 3 or fewer operations. When we have loop information
10714 // available, if this block is within a loop, we should avoid using vperm
10715 // for 3-operation perms and use a constant pool load instead.
10716 if (Cost < 3)
10717 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10718 }
10719 }
10720
10721 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10722 // vector that will get spilled to the constant pool.
10723 if (V2.isUndef()) V2 = V1;
10724
10725 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10726}
10727
10728SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10729 ArrayRef<int> PermMask, EVT VT,
10730 SDValue V1, SDValue V2) const {
10731 unsigned Opcode = PPCISD::VPERM;
10732 EVT ValType = V1.getValueType();
10733 SDLoc dl(Op);
10734 bool NeedSwap = false;
10735 bool isLittleEndian = Subtarget.isLittleEndian();
10736 bool isPPC64 = Subtarget.isPPC64();
10737
10738 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10739 (V1->hasOneUse() || V2->hasOneUse())) {
10740 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10741 "XXPERM instead\n");
10742 Opcode = PPCISD::XXPERM;
10743
10744 // The second input to XXPERM is also an output so if the second input has
10745 // multiple uses then copying is necessary, as a result we want the
10746 // single-use operand to be used as the second input to prevent copying.
10747 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10748 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10749 std::swap(V1, V2);
10750 NeedSwap = !NeedSwap;
10751 }
10752 }
10753
10754 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10755 // that it is in input element units, not in bytes. Convert now.
10756
10757 // For little endian, the order of the input vectors is reversed, and
10758 // the permutation mask is complemented with respect to 31. This is
10759 // necessary to produce proper semantics with the big-endian-based vperm
10760 // instruction.
10761 EVT EltVT = V1.getValueType().getVectorElementType();
10762 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10763
10764 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10765 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10766
10767 /*
10768 Vectors will be appended like so: [ V1 | v2 ]
10769 XXSWAPD on V1:
10770 [ A | B | C | D ] -> [ C | D | A | B ]
10771 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10772 i.e. index of A, B += 8, and index of C, D -= 8.
10773 XXSWAPD on V2:
10774 [ E | F | G | H ] -> [ G | H | E | F ]
10775 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10776 i.e. index of E, F += 8, index of G, H -= 8
10777 Swap V1 and V2:
10778 [ V1 | V2 ] -> [ V2 | V1 ]
10779 0-15 16-31 0-15 16-31
10780 i.e. index of V1 += 16, index of V2 -= 16
10781 */
10782
10783 SmallVector<SDValue, 16> ResultMask;
10784 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10785 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10786
10787 if (V1HasXXSWAPD) {
10788 if (SrcElt < 8)
10789 SrcElt += 8;
10790 else if (SrcElt < 16)
10791 SrcElt -= 8;
10792 }
10793 if (V2HasXXSWAPD) {
10794 if (SrcElt > 23)
10795 SrcElt -= 8;
10796 else if (SrcElt > 15)
10797 SrcElt += 8;
10798 }
10799 if (NeedSwap) {
10800 if (SrcElt < 16)
10801 SrcElt += 16;
10802 else
10803 SrcElt -= 16;
10804 }
10805 for (unsigned j = 0; j != BytesPerElement; ++j)
10806 if (isLittleEndian)
10807 ResultMask.push_back(
10808 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10809 else
10810 ResultMask.push_back(
10811 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10812 }
10813
10814 if (V1HasXXSWAPD) {
10815 dl = SDLoc(V1->getOperand(0));
10816 V1 = V1->getOperand(0)->getOperand(1);
10817 }
10818 if (V2HasXXSWAPD) {
10819 dl = SDLoc(V2->getOperand(0));
10820 V2 = V2->getOperand(0)->getOperand(1);
10821 }
10822
10823 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10824 if (ValType != MVT::v2f64)
10825 V1 = DAG.getBitcast(MVT::v2f64, V1);
10826 if (V2.getValueType() != MVT::v2f64)
10827 V2 = DAG.getBitcast(MVT::v2f64, V2);
10828 }
10829
10830 ShufflesHandledWithVPERM++;
10831 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10832 LLVM_DEBUG({
10833 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10834 if (Opcode == PPCISD::XXPERM) {
10835 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10836 } else {
10837 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10838 }
10839 SVOp->dump();
10840 dbgs() << "With the following permute control vector:\n";
10841 VPermMask.dump();
10842 });
10843
10844 if (Opcode == PPCISD::XXPERM)
10845 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10846
10847 // Only need to place items backwards in LE,
10848 // the mask was properly calculated.
10849 if (isLittleEndian)
10850 std::swap(V1, V2);
10851
10852 SDValue VPERMNode =
10853 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10854
10855 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10856 return VPERMNode;
10857}
10858
10859/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10860/// vector comparison. If it is, return true and fill in Opc/isDot with
10861/// information about the intrinsic.
10862static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10863 bool &isDot, const PPCSubtarget &Subtarget) {
10864 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10865 CompareOpc = -1;
10866 isDot = false;
10867 switch (IntrinsicID) {
10868 default:
10869 return false;
10870 // Comparison predicates.
10871 case Intrinsic::ppc_altivec_vcmpbfp_p:
10872 CompareOpc = 966;
10873 isDot = true;
10874 break;
10875 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10876 CompareOpc = 198;
10877 isDot = true;
10878 break;
10879 case Intrinsic::ppc_altivec_vcmpequb_p:
10880 CompareOpc = 6;
10881 isDot = true;
10882 break;
10883 case Intrinsic::ppc_altivec_vcmpequh_p:
10884 CompareOpc = 70;
10885 isDot = true;
10886 break;
10887 case Intrinsic::ppc_altivec_vcmpequw_p:
10888 CompareOpc = 134;
10889 isDot = true;
10890 break;
10891 case Intrinsic::ppc_altivec_vcmpequd_p:
10892 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10893 CompareOpc = 199;
10894 isDot = true;
10895 } else
10896 return false;
10897 break;
10898 case Intrinsic::ppc_altivec_vcmpneb_p:
10899 case Intrinsic::ppc_altivec_vcmpneh_p:
10900 case Intrinsic::ppc_altivec_vcmpnew_p:
10901 case Intrinsic::ppc_altivec_vcmpnezb_p:
10902 case Intrinsic::ppc_altivec_vcmpnezh_p:
10903 case Intrinsic::ppc_altivec_vcmpnezw_p:
10904 if (Subtarget.hasP9Altivec()) {
10905 switch (IntrinsicID) {
10906 default:
10907 llvm_unreachable("Unknown comparison intrinsic.");
10908 case Intrinsic::ppc_altivec_vcmpneb_p:
10909 CompareOpc = 7;
10910 break;
10911 case Intrinsic::ppc_altivec_vcmpneh_p:
10912 CompareOpc = 71;
10913 break;
10914 case Intrinsic::ppc_altivec_vcmpnew_p:
10915 CompareOpc = 135;
10916 break;
10917 case Intrinsic::ppc_altivec_vcmpnezb_p:
10918 CompareOpc = 263;
10919 break;
10920 case Intrinsic::ppc_altivec_vcmpnezh_p:
10921 CompareOpc = 327;
10922 break;
10923 case Intrinsic::ppc_altivec_vcmpnezw_p:
10924 CompareOpc = 391;
10925 break;
10926 }
10927 isDot = true;
10928 } else
10929 return false;
10930 break;
10931 case Intrinsic::ppc_altivec_vcmpgefp_p:
10932 CompareOpc = 454;
10933 isDot = true;
10934 break;
10935 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10936 CompareOpc = 710;
10937 isDot = true;
10938 break;
10939 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10940 CompareOpc = 774;
10941 isDot = true;
10942 break;
10943 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10944 CompareOpc = 838;
10945 isDot = true;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10948 CompareOpc = 902;
10949 isDot = true;
10950 break;
10951 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10952 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10953 CompareOpc = 967;
10954 isDot = true;
10955 } else
10956 return false;
10957 break;
10958 case Intrinsic::ppc_altivec_vcmpgtub_p:
10959 CompareOpc = 518;
10960 isDot = true;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10963 CompareOpc = 582;
10964 isDot = true;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10967 CompareOpc = 646;
10968 isDot = true;
10969 break;
10970 case Intrinsic::ppc_altivec_vcmpgtud_p:
10971 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10972 CompareOpc = 711;
10973 isDot = true;
10974 } else
10975 return false;
10976 break;
10977
10978 case Intrinsic::ppc_altivec_vcmpequq:
10979 case Intrinsic::ppc_altivec_vcmpgtsq:
10980 case Intrinsic::ppc_altivec_vcmpgtuq:
10981 if (!Subtarget.isISA3_1())
10982 return false;
10983 switch (IntrinsicID) {
10984 default:
10985 llvm_unreachable("Unknown comparison intrinsic.");
10986 case Intrinsic::ppc_altivec_vcmpequq:
10987 CompareOpc = 455;
10988 break;
10989 case Intrinsic::ppc_altivec_vcmpgtsq:
10990 CompareOpc = 903;
10991 break;
10992 case Intrinsic::ppc_altivec_vcmpgtuq:
10993 CompareOpc = 647;
10994 break;
10995 }
10996 break;
10997
10998 // VSX predicate comparisons use the same infrastructure
10999 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11000 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11001 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11002 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11003 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11004 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11005 if (Subtarget.hasVSX()) {
11006 switch (IntrinsicID) {
11007 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11008 CompareOpc = 99;
11009 break;
11010 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11011 CompareOpc = 115;
11012 break;
11013 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11014 CompareOpc = 107;
11015 break;
11016 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11017 CompareOpc = 67;
11018 break;
11019 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11020 CompareOpc = 83;
11021 break;
11022 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11023 CompareOpc = 75;
11024 break;
11025 }
11026 isDot = true;
11027 } else
11028 return false;
11029 break;
11030
11031 // Normal Comparisons.
11032 case Intrinsic::ppc_altivec_vcmpbfp:
11033 CompareOpc = 966;
11034 break;
11035 case Intrinsic::ppc_altivec_vcmpeqfp:
11036 CompareOpc = 198;
11037 break;
11038 case Intrinsic::ppc_altivec_vcmpequb:
11039 CompareOpc = 6;
11040 break;
11041 case Intrinsic::ppc_altivec_vcmpequh:
11042 CompareOpc = 70;
11043 break;
11044 case Intrinsic::ppc_altivec_vcmpequw:
11045 CompareOpc = 134;
11046 break;
11047 case Intrinsic::ppc_altivec_vcmpequd:
11048 if (Subtarget.hasP8Altivec())
11049 CompareOpc = 199;
11050 else
11051 return false;
11052 break;
11053 case Intrinsic::ppc_altivec_vcmpneb:
11054 case Intrinsic::ppc_altivec_vcmpneh:
11055 case Intrinsic::ppc_altivec_vcmpnew:
11056 case Intrinsic::ppc_altivec_vcmpnezb:
11057 case Intrinsic::ppc_altivec_vcmpnezh:
11058 case Intrinsic::ppc_altivec_vcmpnezw:
11059 if (Subtarget.hasP9Altivec())
11060 switch (IntrinsicID) {
11061 default:
11062 llvm_unreachable("Unknown comparison intrinsic.");
11063 case Intrinsic::ppc_altivec_vcmpneb:
11064 CompareOpc = 7;
11065 break;
11066 case Intrinsic::ppc_altivec_vcmpneh:
11067 CompareOpc = 71;
11068 break;
11069 case Intrinsic::ppc_altivec_vcmpnew:
11070 CompareOpc = 135;
11071 break;
11072 case Intrinsic::ppc_altivec_vcmpnezb:
11073 CompareOpc = 263;
11074 break;
11075 case Intrinsic::ppc_altivec_vcmpnezh:
11076 CompareOpc = 327;
11077 break;
11078 case Intrinsic::ppc_altivec_vcmpnezw:
11079 CompareOpc = 391;
11080 break;
11081 }
11082 else
11083 return false;
11084 break;
11085 case Intrinsic::ppc_altivec_vcmpgefp:
11086 CompareOpc = 454;
11087 break;
11088 case Intrinsic::ppc_altivec_vcmpgtfp:
11089 CompareOpc = 710;
11090 break;
11091 case Intrinsic::ppc_altivec_vcmpgtsb:
11092 CompareOpc = 774;
11093 break;
11094 case Intrinsic::ppc_altivec_vcmpgtsh:
11095 CompareOpc = 838;
11096 break;
11097 case Intrinsic::ppc_altivec_vcmpgtsw:
11098 CompareOpc = 902;
11099 break;
11100 case Intrinsic::ppc_altivec_vcmpgtsd:
11101 if (Subtarget.hasP8Altivec())
11102 CompareOpc = 967;
11103 else
11104 return false;
11105 break;
11106 case Intrinsic::ppc_altivec_vcmpgtub:
11107 CompareOpc = 518;
11108 break;
11109 case Intrinsic::ppc_altivec_vcmpgtuh:
11110 CompareOpc = 582;
11111 break;
11112 case Intrinsic::ppc_altivec_vcmpgtuw:
11113 CompareOpc = 646;
11114 break;
11115 case Intrinsic::ppc_altivec_vcmpgtud:
11116 if (Subtarget.hasP8Altivec())
11117 CompareOpc = 711;
11118 else
11119 return false;
11120 break;
11121 case Intrinsic::ppc_altivec_vcmpequq_p:
11122 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11123 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11124 if (!Subtarget.isISA3_1())
11125 return false;
11126 switch (IntrinsicID) {
11127 default:
11128 llvm_unreachable("Unknown comparison intrinsic.");
11129 case Intrinsic::ppc_altivec_vcmpequq_p:
11130 CompareOpc = 455;
11131 break;
11132 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11133 CompareOpc = 903;
11134 break;
11135 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11136 CompareOpc = 647;
11137 break;
11138 }
11139 isDot = true;
11140 break;
11141 }
11142 return true;
11143}
11144
11145/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11146/// lower, do it, otherwise return null.
11147SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11148 SelectionDAG &DAG) const {
11149 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11150
11151 SDLoc dl(Op);
11152
11153 switch (IntrinsicID) {
11154 case Intrinsic::thread_pointer:
11155 // Reads the thread pointer register, used for __builtin_thread_pointer.
11156 if (Subtarget.isPPC64())
11157 return DAG.getRegister(PPC::X13, MVT::i64);
11158 return DAG.getRegister(PPC::R2, MVT::i32);
11159
11160 case Intrinsic::ppc_rldimi: {
11161 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11162 SDValue Src = Op.getOperand(1);
11163 APInt Mask = Op.getConstantOperandAPInt(4);
11164 if (Mask.isZero())
11165 return Op.getOperand(2);
11166 if (Mask.isAllOnes())
11167 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11168 uint64_t SH = Op.getConstantOperandVal(3);
11169 unsigned MB = 0, ME = 0;
11170 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11171 report_fatal_error("invalid rldimi mask!");
11172 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11173 if (ME < 63 - SH) {
11174 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11175 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11176 } else if (ME > 63 - SH) {
11177 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11178 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11179 }
11180 return SDValue(
11181 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11182 {Op.getOperand(2), Src,
11183 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11184 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11185 0);
11186 }
11187
11188 case Intrinsic::ppc_rlwimi: {
11189 APInt Mask = Op.getConstantOperandAPInt(4);
11190 if (Mask.isZero())
11191 return Op.getOperand(2);
11192 if (Mask.isAllOnes())
11193 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11194 Op.getOperand(3));
11195 unsigned MB = 0, ME = 0;
11196 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11197 report_fatal_error("invalid rlwimi mask!");
11198 return SDValue(DAG.getMachineNode(
11199 PPC::RLWIMI, dl, MVT::i32,
11200 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11201 DAG.getTargetConstant(MB, dl, MVT::i32),
11202 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11203 0);
11204 }
11205
11206 case Intrinsic::ppc_rlwnm: {
11207 if (Op.getConstantOperandVal(3) == 0)
11208 return DAG.getConstant(0, dl, MVT::i32);
11209 unsigned MB = 0, ME = 0;
11210 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11211 report_fatal_error("invalid rlwnm mask!");
11212 return SDValue(
11213 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11214 {Op.getOperand(1), Op.getOperand(2),
11215 DAG.getTargetConstant(MB, dl, MVT::i32),
11216 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11217 0);
11218 }
11219
11220 case Intrinsic::ppc_mma_disassemble_acc: {
11221 if (Subtarget.isISAFuture()) {
11222 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11223 SDValue WideVec =
11224 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11225 Op.getOperand(1)),
11226 0);
11228 SDValue Value = SDValue(WideVec.getNode(), 0);
11229 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11230
11231 SDValue Extract;
11232 Extract = DAG.getNode(
11233 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11234 Subtarget.isLittleEndian() ? Value2 : Value,
11235 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11236 dl, getPointerTy(DAG.getDataLayout())));
11237 RetOps.push_back(Extract);
11238 Extract = DAG.getNode(
11239 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11240 Subtarget.isLittleEndian() ? Value2 : Value,
11241 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11242 dl, getPointerTy(DAG.getDataLayout())));
11243 RetOps.push_back(Extract);
11244 Extract = DAG.getNode(
11245 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11246 Subtarget.isLittleEndian() ? Value : Value2,
11247 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11248 dl, getPointerTy(DAG.getDataLayout())));
11249 RetOps.push_back(Extract);
11250 Extract = DAG.getNode(
11251 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11252 Subtarget.isLittleEndian() ? Value : Value2,
11253 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11254 dl, getPointerTy(DAG.getDataLayout())));
11255 RetOps.push_back(Extract);
11256 return DAG.getMergeValues(RetOps, dl);
11257 }
11258 [[fallthrough]];
11259 }
11260 case Intrinsic::ppc_vsx_disassemble_pair: {
11261 int NumVecs = 2;
11262 SDValue WideVec = Op.getOperand(1);
11263 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11264 NumVecs = 4;
11265 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11266 }
11268 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11269 SDValue Extract = DAG.getNode(
11270 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11271 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11272 : VecNo,
11273 dl, getPointerTy(DAG.getDataLayout())));
11274 RetOps.push_back(Extract);
11275 }
11276 return DAG.getMergeValues(RetOps, dl);
11277 }
11278
11279 case Intrinsic::ppc_mma_build_dmr: {
11282 for (int i = 1; i < 9; i += 2) {
11283 SDValue Hi = Op.getOperand(i);
11284 SDValue Lo = Op.getOperand(i + 1);
11285 if (Hi->getOpcode() == ISD::LOAD)
11286 Chains.push_back(Hi.getValue(1));
11287 if (Lo->getOpcode() == ISD::LOAD)
11288 Chains.push_back(Lo.getValue(1));
11289 Pairs.push_back(
11290 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11291 }
11292 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11293 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11294 return DAG.getMergeValues({Value, TF}, dl);
11295 }
11296
11297 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11298 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11299 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11300 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11301 "Specify P of 0 or 1 for lower or upper 512 bytes");
11302 unsigned HiLo = Idx->getSExtValue();
11303 unsigned Opcode;
11304 unsigned Subx;
11305 if (HiLo == 0) {
11306 Opcode = PPC::DMXXEXTFDMR512;
11307 Subx = PPC::sub_wacc_lo;
11308 } else {
11309 Opcode = PPC::DMXXEXTFDMR512_HI;
11310 Subx = PPC::sub_wacc_hi;
11311 }
11312 SDValue Subreg(
11313 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11314 Op.getOperand(1),
11315 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11316 0);
11317 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11318 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11319 }
11320
11321 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11322 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11323 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11324 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11325 "Specify a dmr row pair 0-3");
11326 unsigned IdxVal = Idx->getSExtValue();
11327 unsigned Subx;
11328 switch (IdxVal) {
11329 case 0:
11330 Subx = PPC::sub_dmrrowp0;
11331 break;
11332 case 1:
11333 Subx = PPC::sub_dmrrowp1;
11334 break;
11335 case 2:
11336 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11337 break;
11338 case 3:
11339 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11340 break;
11341 }
11342 SDValue Subreg(
11343 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11344 Op.getOperand(1),
11345 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11346 0);
11347 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11348 return SDValue(
11349 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11350 0);
11351 }
11352
11353 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11354 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11355 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11356 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11357 "Specify P of 0 or 1 for lower or upper 512 bytes");
11358 unsigned HiLo = Idx->getSExtValue();
11359 unsigned Opcode;
11360 unsigned Subx;
11361 if (HiLo == 0) {
11362 Opcode = PPC::DMXXINSTDMR512;
11363 Subx = PPC::sub_wacc_lo;
11364 } else {
11365 Opcode = PPC::DMXXINSTDMR512_HI;
11366 Subx = PPC::sub_wacc_hi;
11367 }
11368 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11369 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11370 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11371 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11372 Op.getOperand(1), Wacc, SubReg),
11373 0);
11374 }
11375
11376 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11377 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11378 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11379 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11380 "Specify a dmr row pair 0-3");
11381 unsigned IdxVal = Idx->getSExtValue();
11382 unsigned Subx;
11383 switch (IdxVal) {
11384 case 0:
11385 Subx = PPC::sub_dmrrowp0;
11386 break;
11387 case 1:
11388 Subx = PPC::sub_dmrrowp1;
11389 break;
11390 case 2:
11391 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11392 break;
11393 case 3:
11394 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11395 break;
11396 }
11397 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11398 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11399 SDValue Ops[] = {Op.getOperand(2), P};
11400 SDValue DMRRowp = SDValue(
11401 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11402 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11403 Op.getOperand(1), DMRRowp, SubReg),
11404 0);
11405 }
11406
11407 case Intrinsic::ppc_mma_xxmfacc:
11408 case Intrinsic::ppc_mma_xxmtacc: {
11409 // Allow pre-isa-future subtargets to lower as normal.
11410 if (!Subtarget.isISAFuture())
11411 return SDValue();
11412 // The intrinsics for xxmtacc and xxmfacc take one argument of
11413 // type v512i1, for future cpu the corresponding wacc instruction
11414 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11415 // the need to produce the xxm[t|f]acc.
11416 SDValue WideVec = Op.getOperand(1);
11417 DAG.ReplaceAllUsesWith(Op, WideVec);
11418 return SDValue();
11419 }
11420
11421 case Intrinsic::ppc_unpack_longdouble: {
11422 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11423 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11424 "Argument of long double unpack must be 0 or 1!");
11425 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11426 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11427 Idx->getValueType(0)));
11428 }
11429
11430 case Intrinsic::ppc_compare_exp_lt:
11431 case Intrinsic::ppc_compare_exp_gt:
11432 case Intrinsic::ppc_compare_exp_eq:
11433 case Intrinsic::ppc_compare_exp_uo: {
11434 unsigned Pred;
11435 switch (IntrinsicID) {
11436 case Intrinsic::ppc_compare_exp_lt:
11437 Pred = PPC::PRED_LT;
11438 break;
11439 case Intrinsic::ppc_compare_exp_gt:
11440 Pred = PPC::PRED_GT;
11441 break;
11442 case Intrinsic::ppc_compare_exp_eq:
11443 Pred = PPC::PRED_EQ;
11444 break;
11445 case Intrinsic::ppc_compare_exp_uo:
11446 Pred = PPC::PRED_UN;
11447 break;
11448 }
11449 return SDValue(
11450 DAG.getMachineNode(
11451 PPC::SELECT_CC_I4, dl, MVT::i32,
11452 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11453 Op.getOperand(1), Op.getOperand(2)),
11454 0),
11455 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11456 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11457 0);
11458 }
11459 case Intrinsic::ppc_test_data_class: {
11460 EVT OpVT = Op.getOperand(1).getValueType();
11461 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11462 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11463 : PPC::XSTSTDCSP);
11464 return SDValue(
11465 DAG.getMachineNode(
11466 PPC::SELECT_CC_I4, dl, MVT::i32,
11467 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11468 Op.getOperand(1)),
11469 0),
11470 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11471 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11472 0);
11473 }
11474 case Intrinsic::ppc_fnmsub: {
11475 EVT VT = Op.getOperand(1).getValueType();
11476 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11477 return DAG.getNode(
11478 ISD::FNEG, dl, VT,
11479 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11480 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11481 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11482 Op.getOperand(2), Op.getOperand(3));
11483 }
11484 case Intrinsic::ppc_convert_f128_to_ppcf128:
11485 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11486 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11487 ? RTLIB::CONVERT_PPCF128_F128
11488 : RTLIB::CONVERT_F128_PPCF128;
11489 MakeLibCallOptions CallOptions;
11490 std::pair<SDValue, SDValue> Result =
11491 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11492 dl, SDValue());
11493 return Result.first;
11494 }
11495 case Intrinsic::ppc_maxfe:
11496 case Intrinsic::ppc_maxfl:
11497 case Intrinsic::ppc_maxfs:
11498 case Intrinsic::ppc_minfe:
11499 case Intrinsic::ppc_minfl:
11500 case Intrinsic::ppc_minfs: {
11501 EVT VT = Op.getValueType();
11502 assert(
11503 all_of(Op->ops().drop_front(4),
11504 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11505 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11506 (void)VT;
11508 if (IntrinsicID == Intrinsic::ppc_minfe ||
11509 IntrinsicID == Intrinsic::ppc_minfl ||
11510 IntrinsicID == Intrinsic::ppc_minfs)
11511 CC = ISD::SETLT;
11512 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11513 SDValue Res = Op.getOperand(I);
11514 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11515 Res =
11516 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11517 }
11518 return Res;
11519 }
11520 }
11521
11522 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11523 // opcode number of the comparison.
11524 int CompareOpc;
11525 bool isDot;
11526 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11527 return SDValue(); // Don't custom lower most intrinsics.
11528
11529 // If this is a non-dot comparison, make the VCMP node and we are done.
11530 if (!isDot) {
11531 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11532 Op.getOperand(1), Op.getOperand(2),
11533 DAG.getConstant(CompareOpc, dl, MVT::i32));
11534 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11535 }
11536
11537 // Create the PPCISD altivec 'dot' comparison node.
11538 SDValue Ops[] = {
11539 Op.getOperand(2), // LHS
11540 Op.getOperand(3), // RHS
11541 DAG.getConstant(CompareOpc, dl, MVT::i32)
11542 };
11543 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11544 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11545
11546 // Unpack the result based on how the target uses it.
11547 unsigned BitNo; // Bit # of CR6.
11548 bool InvertBit; // Invert result?
11549 unsigned Bitx;
11550 unsigned SetOp;
11551 switch (Op.getConstantOperandVal(1)) {
11552 default: // Can't happen, don't crash on invalid number though.
11553 case 0: // Return the value of the EQ bit of CR6.
11554 BitNo = 0;
11555 InvertBit = false;
11556 Bitx = PPC::sub_eq;
11557 SetOp = PPCISD::SETBC;
11558 break;
11559 case 1: // Return the inverted value of the EQ bit of CR6.
11560 BitNo = 0;
11561 InvertBit = true;
11562 Bitx = PPC::sub_eq;
11563 SetOp = PPCISD::SETBCR;
11564 break;
11565 case 2: // Return the value of the LT bit of CR6.
11566 BitNo = 2;
11567 InvertBit = false;
11568 Bitx = PPC::sub_lt;
11569 SetOp = PPCISD::SETBC;
11570 break;
11571 case 3: // Return the inverted value of the LT bit of CR6.
11572 BitNo = 2;
11573 InvertBit = true;
11574 Bitx = PPC::sub_lt;
11575 SetOp = PPCISD::SETBCR;
11576 break;
11577 }
11578
11579 SDValue GlueOp = CompNode.getValue(1);
11580 if (Subtarget.isISA3_1()) {
11581 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11582 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11583 SDValue CRBit =
11584 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11585 CR6Reg, SubRegIdx, GlueOp),
11586 0);
11587 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11588 }
11589
11590 // Now that we have the comparison, emit a copy from the CR to a GPR.
11591 // This is flagged to the above dot comparison.
11592 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11593 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11594
11595 // Shift the bit into the low position.
11596 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11597 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11598 // Isolate the bit.
11599 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11600 DAG.getConstant(1, dl, MVT::i32));
11601
11602 // If we are supposed to, toggle the bit.
11603 if (InvertBit)
11604 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11605 DAG.getConstant(1, dl, MVT::i32));
11606 return Flags;
11607}
11608
11609SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11610 SelectionDAG &DAG) const {
11611 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11612 // the beginning of the argument list.
11613 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11614 SDLoc DL(Op);
11615 switch (Op.getConstantOperandVal(ArgStart)) {
11616 case Intrinsic::ppc_cfence: {
11617 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11618 SDValue Val = Op.getOperand(ArgStart + 1);
11619 EVT Ty = Val.getValueType();
11620 if (Ty == MVT::i128) {
11621 // FIXME: Testing one of two paired registers is sufficient to guarantee
11622 // ordering?
11623 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11624 }
11625 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11626 return SDValue(
11627 DAG.getMachineNode(
11628 Opcode, DL, MVT::Other,
11629 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11630 Op.getOperand(0)),
11631 0);
11632 }
11633 case Intrinsic::ppc_mma_disassemble_dmr: {
11634 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11635 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11636 }
11637 default:
11638 break;
11639 }
11640 return SDValue();
11641}
11642
11643// Lower scalar BSWAP64 to xxbrd.
11644SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11645 SDLoc dl(Op);
11646 if (!Subtarget.isPPC64())
11647 return Op;
11648 // MTVSRDD
11649 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11650 Op.getOperand(0));
11651 // XXBRD
11652 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11653 // MFVSRD
11654 int VectorIndex = 0;
11655 if (Subtarget.isLittleEndian())
11656 VectorIndex = 1;
11657 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11658 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11659 return Op;
11660}
11661
11662// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11663// compared to a value that is atomically loaded (atomic loads zero-extend).
11664SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11665 SelectionDAG &DAG) const {
11666 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11667 "Expecting an atomic compare-and-swap here.");
11668 SDLoc dl(Op);
11669 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11670 EVT MemVT = AtomicNode->getMemoryVT();
11671 if (MemVT.getSizeInBits() >= 32)
11672 return Op;
11673
11674 SDValue CmpOp = Op.getOperand(2);
11675 // If this is already correctly zero-extended, leave it alone.
11676 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11677 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11678 return Op;
11679
11680 // Clear the high bits of the compare operand.
11681 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11682 SDValue NewCmpOp =
11683 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11684 DAG.getConstant(MaskVal, dl, MVT::i32));
11685
11686 // Replace the existing compare operand with the properly zero-extended one.
11688 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11689 Ops.push_back(AtomicNode->getOperand(i));
11690 Ops[2] = NewCmpOp;
11691 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11692 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11693 auto NodeTy =
11695 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11696}
11697
11698SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11699 SelectionDAG &DAG) const {
11700 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11701 EVT MemVT = N->getMemoryVT();
11702 assert(MemVT.getSimpleVT() == MVT::i128 &&
11703 "Expect quadword atomic operations");
11704 SDLoc dl(N);
11705 unsigned Opc = N->getOpcode();
11706 switch (Opc) {
11707 case ISD::ATOMIC_LOAD: {
11708 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11709 // lowered to ppc instructions by pattern matching instruction selector.
11710 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11712 N->getOperand(0),
11713 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11714 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11715 Ops.push_back(N->getOperand(I));
11716 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11717 Ops, MemVT, N->getMemOperand());
11718 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11719 SDValue ValHi =
11720 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11721 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11722 DAG.getConstant(64, dl, MVT::i32));
11723 SDValue Val =
11724 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11725 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11726 {Val, LoadedVal.getValue(2)});
11727 }
11728 case ISD::ATOMIC_STORE: {
11729 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11730 // lowered to ppc instructions by pattern matching instruction selector.
11731 SDVTList Tys = DAG.getVTList(MVT::Other);
11733 N->getOperand(0),
11734 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11735 SDValue Val = N->getOperand(1);
11736 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11737 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11738 DAG.getConstant(64, dl, MVT::i32));
11739 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11740 Ops.push_back(ValLo);
11741 Ops.push_back(ValHi);
11742 Ops.push_back(N->getOperand(2));
11743 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11744 N->getMemOperand());
11745 }
11746 default:
11747 llvm_unreachable("Unexpected atomic opcode");
11748 }
11749}
11750
11752 SelectionDAG &DAG,
11753 const PPCSubtarget &Subtarget) {
11754 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11755
11756 enum DataClassMask {
11757 DC_NAN = 1 << 6,
11758 DC_NEG_INF = 1 << 4,
11759 DC_POS_INF = 1 << 5,
11760 DC_NEG_ZERO = 1 << 2,
11761 DC_POS_ZERO = 1 << 3,
11762 DC_NEG_SUBNORM = 1,
11763 DC_POS_SUBNORM = 1 << 1,
11764 };
11765
11766 EVT VT = Op.getValueType();
11767
11768 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11769 : VT == MVT::f64 ? PPC::XSTSTDCDP
11770 : PPC::XSTSTDCSP;
11771
11772 if (Mask == fcAllFlags)
11773 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11774 if (Mask == 0)
11775 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11776
11777 // When it's cheaper or necessary to test reverse flags.
11778 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11779 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11780 return DAG.getNOT(Dl, Rev, MVT::i1);
11781 }
11782
11783 // Power doesn't support testing whether a value is 'normal'. Test the rest
11784 // first, and test if it's 'not not-normal' with expected sign.
11785 if (Mask & fcNormal) {
11786 SDValue Rev(DAG.getMachineNode(
11787 TestOp, Dl, MVT::i32,
11788 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11789 DC_NEG_ZERO | DC_POS_ZERO |
11790 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11791 Dl, MVT::i32),
11792 Op),
11793 0);
11794 // Sign are stored in CR bit 0, result are in CR bit 2.
11795 SDValue Sign(
11796 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11797 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11798 0);
11799 SDValue Normal(DAG.getNOT(
11800 Dl,
11802 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11803 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11804 0),
11805 MVT::i1));
11806 if (Mask & fcPosNormal)
11807 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11808 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11809 if (Mask == fcPosNormal || Mask == fcNegNormal)
11810 return Result;
11811
11812 return DAG.getNode(
11813 ISD::OR, Dl, MVT::i1,
11814 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11815 }
11816
11817 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11818 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11819 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11820 bool IsQuiet = Mask & fcQNan;
11821 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11822
11823 // Quietness is determined by the first bit in fraction field.
11824 uint64_t QuietMask = 0;
11825 SDValue HighWord;
11826 if (VT == MVT::f128) {
11827 HighWord = DAG.getNode(
11828 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11829 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11830 QuietMask = 0x8000;
11831 } else if (VT == MVT::f64) {
11832 if (Subtarget.isPPC64()) {
11833 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11834 DAG.getBitcast(MVT::i64, Op),
11835 DAG.getConstant(1, Dl, MVT::i32));
11836 } else {
11837 SDValue Vec = DAG.getBitcast(
11838 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11839 HighWord = DAG.getNode(
11840 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11841 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11842 }
11843 QuietMask = 0x80000;
11844 } else if (VT == MVT::f32) {
11845 HighWord = DAG.getBitcast(MVT::i32, Op);
11846 QuietMask = 0x400000;
11847 }
11848 SDValue NanRes = DAG.getSetCC(
11849 Dl, MVT::i1,
11850 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11851 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11852 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11853 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11854 if (Mask == fcQNan || Mask == fcSNan)
11855 return NanRes;
11856
11857 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11858 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11859 NanRes);
11860 }
11861
11862 unsigned NativeMask = 0;
11863 if ((Mask & fcNan) == fcNan)
11864 NativeMask |= DC_NAN;
11865 if (Mask & fcNegInf)
11866 NativeMask |= DC_NEG_INF;
11867 if (Mask & fcPosInf)
11868 NativeMask |= DC_POS_INF;
11869 if (Mask & fcNegZero)
11870 NativeMask |= DC_NEG_ZERO;
11871 if (Mask & fcPosZero)
11872 NativeMask |= DC_POS_ZERO;
11873 if (Mask & fcNegSubnormal)
11874 NativeMask |= DC_NEG_SUBNORM;
11875 if (Mask & fcPosSubnormal)
11876 NativeMask |= DC_POS_SUBNORM;
11877 return SDValue(
11878 DAG.getMachineNode(
11879 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11881 TestOp, Dl, MVT::i32,
11882 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11883 0),
11884 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11885 0);
11886}
11887
11888SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11889 SelectionDAG &DAG) const {
11890 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11891 SDValue LHS = Op.getOperand(0);
11892 uint64_t RHSC = Op.getConstantOperandVal(1);
11893 SDLoc Dl(Op);
11894 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11895 if (LHS.getValueType() == MVT::ppcf128) {
11896 // The higher part determines the value class.
11897 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11898 DAG.getConstant(1, Dl, MVT::i32));
11899 }
11900
11901 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11902}
11903
11904SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11905 SelectionDAG &DAG) const {
11906 SDLoc dl(Op);
11907
11909 SDValue Op0 = Op.getOperand(0);
11910 EVT ValVT = Op0.getValueType();
11911 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11912 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11913 int64_t IntVal = Op.getConstantOperandVal(0);
11914 if (IntVal >= -16 && IntVal <= 15)
11915 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11916 dl);
11917 }
11918
11919 ReuseLoadInfo RLI;
11920 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11921 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11922 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11923 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11924
11925 MachineMemOperand *MMO =
11927 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11928 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11930 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11931 MVT::i32, MMO);
11932 if (RLI.ResChain)
11933 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11934 return Bits.getValue(0);
11935 }
11936
11937 // Create a stack slot that is 16-byte aligned.
11938 MachineFrameInfo &MFI = MF.getFrameInfo();
11939 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11940 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11941 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11942
11943 SDValue Val = Op0;
11944 // P10 hardware store forwarding requires that a single store contains all
11945 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11946 // to avoid load hit store on P10 when running binaries compiled for older
11947 // processors by generating two mergeable scalar stores to forward with the
11948 // vector load.
11949 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11950 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11951 ValVT.getSizeInBits() <= 64) {
11952 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11953 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11954 SDValue ShiftBy = DAG.getConstant(
11955 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11956 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11957 SDValue Plus8 =
11958 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11959 SDValue Store2 =
11960 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11961 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11962 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11964 }
11965
11966 // Store the input value into Value#0 of the stack slot.
11967 SDValue Store =
11968 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11969 // Load it out.
11970 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11971}
11972
11973SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11974 SelectionDAG &DAG) const {
11975 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11976 "Should only be called for ISD::INSERT_VECTOR_ELT");
11977
11978 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11979
11980 EVT VT = Op.getValueType();
11981 SDLoc dl(Op);
11982 SDValue V1 = Op.getOperand(0);
11983 SDValue V2 = Op.getOperand(1);
11984
11985 if (VT == MVT::v2f64 && C)
11986 return Op;
11987
11988 if (Subtarget.hasP9Vector()) {
11989 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11990 // because on P10, it allows this specific insert_vector_elt load pattern to
11991 // utilize the refactored load and store infrastructure in order to exploit
11992 // prefixed loads.
11993 // On targets with inexpensive direct moves (Power9 and up), a
11994 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11995 // load since a single precision load will involve conversion to double
11996 // precision on the load followed by another conversion to single precision.
11997 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11998 (isa<LoadSDNode>(V2))) {
11999 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12000 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12001 SDValue InsVecElt =
12002 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12003 BitcastLoad, Op.getOperand(2));
12004 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12005 }
12006 }
12007
12008 if (Subtarget.isISA3_1()) {
12009 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12010 return SDValue();
12011 // On P10, we have legal lowering for constant and variable indices for
12012 // all vectors.
12013 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12014 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12015 return Op;
12016 }
12017
12018 // Before P10, we have legal lowering for constant indices but not for
12019 // variable ones.
12020 if (!C)
12021 return SDValue();
12022
12023 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12024 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12025 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12026 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12027 unsigned InsertAtElement = C->getZExtValue();
12028 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12029 if (Subtarget.isLittleEndian()) {
12030 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12031 }
12032 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12033 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12034 }
12035 return Op;
12036}
12037
12038SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12039 SelectionDAG &DAG) const {
12040 SDLoc dl(Op);
12041 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12042 SDValue LoadChain = LN->getChain();
12043 SDValue BasePtr = LN->getBasePtr();
12044 EVT VT = Op.getValueType();
12045 bool IsV1024i1 = VT == MVT::v1024i1;
12046 bool IsV2048i1 = VT == MVT::v2048i1;
12047
12048 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12049 // Dense Math dmr pair registers, respectively.
12050 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12051 (void)IsV2048i1;
12052 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12053 "Dense Math support required.");
12054 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12055
12057 SmallVector<SDValue, 8> LoadChains;
12058
12059 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12060 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12061 MachineMemOperand *MMO = LN->getMemOperand();
12062 unsigned NumVecs = VT.getSizeInBits() / 256;
12063 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12064 MachineMemOperand *NewMMO =
12065 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12066 if (Idx > 0) {
12067 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12068 DAG.getConstant(32, dl, BasePtr.getValueType()));
12069 LoadOps[2] = BasePtr;
12070 }
12072 DAG.getVTList(MVT::v256i1, MVT::Other),
12073 LoadOps, MVT::v256i1, NewMMO);
12074 LoadChains.push_back(Ld.getValue(1));
12075 Loads.push_back(Ld);
12076 }
12077
12078 if (Subtarget.isLittleEndian()) {
12079 std::reverse(Loads.begin(), Loads.end());
12080 std::reverse(LoadChains.begin(), LoadChains.end());
12081 }
12082
12083 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12084 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
12085 Loads[1]),
12086 0);
12087 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12088 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12089 Loads[2], Loads[3]),
12090 0);
12091 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12092 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12093 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12094
12095 SDValue Value =
12096 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12097
12098 if (IsV1024i1) {
12099 return DAG.getMergeValues({Value, TF}, dl);
12100 }
12101
12102 // Handle Loads for V2048i1 which represents a dmr pair.
12103 SDValue DmrPValue;
12104 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12105 Loads[4], Loads[5]),
12106 0);
12107 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12108 Loads[6], Loads[7]),
12109 0);
12110 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12111 SDValue Dmr1Value = SDValue(
12112 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12113
12114 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12115 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12116
12117 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12118 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12119
12120 DmrPValue = SDValue(
12121 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12122
12123 return DAG.getMergeValues({DmrPValue, TF}, dl);
12124}
12125
12126SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12127 const SDLoc &dl,
12128 SelectionDAG &DAG) const {
12129 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12130 Pairs[1]),
12131 0);
12132 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12133 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12134 Pairs[2], Pairs[3]),
12135 0);
12136 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12137 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12138
12139 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12140 {RC, Lo, LoSub, Hi, HiSub}),
12141 0);
12142}
12143
12144SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12145 SelectionDAG &DAG) const {
12146 SDLoc dl(Op);
12147 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12148 SDValue LoadChain = LN->getChain();
12149 SDValue BasePtr = LN->getBasePtr();
12150 EVT VT = Op.getValueType();
12151
12152 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12153 return LowerDMFVectorLoad(Op, DAG);
12154
12155 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12156 return Op;
12157
12158 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12159 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12160 // 2 or 4 vsx registers.
12161 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12162 "Type unsupported without MMA");
12163 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12164 "Type unsupported without paired vector support");
12165 Align Alignment = LN->getAlign();
12167 SmallVector<SDValue, 4> LoadChains;
12168 unsigned NumVecs = VT.getSizeInBits() / 128;
12169 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12170 SDValue Load =
12171 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12172 LN->getPointerInfo().getWithOffset(Idx * 16),
12173 commonAlignment(Alignment, Idx * 16),
12174 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12175 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12176 DAG.getConstant(16, dl, BasePtr.getValueType()));
12177 Loads.push_back(Load);
12178 LoadChains.push_back(Load.getValue(1));
12179 }
12180 if (Subtarget.isLittleEndian()) {
12181 std::reverse(Loads.begin(), Loads.end());
12182 std::reverse(LoadChains.begin(), LoadChains.end());
12183 }
12184 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12185 SDValue Value =
12186 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12187 dl, VT, Loads);
12188 SDValue RetOps[] = {Value, TF};
12189 return DAG.getMergeValues(RetOps, dl);
12190}
12191
12192SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12193 SelectionDAG &DAG) const {
12194
12195 SDLoc dl(Op);
12196 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12197 SDValue StoreChain = SN->getChain();
12198 SDValue BasePtr = SN->getBasePtr();
12201 EVT VT = SN->getValue().getValueType();
12202 bool IsV1024i1 = VT == MVT::v1024i1;
12203 bool IsV2048i1 = VT == MVT::v2048i1;
12204
12205 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12206 // Dense Math dmr pair registers, respectively.
12207 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12208 (void)IsV2048i1;
12209 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12210 "Dense Math support required.");
12211 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12212
12213 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12214 if (IsV1024i1) {
12216 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12217 Op.getOperand(1),
12218 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12219 0);
12221 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12222 Op.getOperand(1),
12223 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12224 0);
12225 MachineSDNode *ExtNode =
12226 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12227 Values.push_back(SDValue(ExtNode, 0));
12228 Values.push_back(SDValue(ExtNode, 1));
12229 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12230 Values.push_back(SDValue(ExtNode, 0));
12231 Values.push_back(SDValue(ExtNode, 1));
12232 } else {
12233 // This corresponds to v2048i1 which represents a dmr pair.
12234 SDValue Dmr0(
12235 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12236 Op.getOperand(1),
12237 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12238 0);
12239
12240 SDValue Dmr1(
12241 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12242 Op.getOperand(1),
12243 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12244 0);
12245
12246 SDValue Dmr0Lo(DAG.getMachineNode(
12247 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12248 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12249 0);
12250
12251 SDValue Dmr0Hi(DAG.getMachineNode(
12252 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12253 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12254 0);
12255
12256 SDValue Dmr1Lo(DAG.getMachineNode(
12257 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12258 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12259 0);
12260
12261 SDValue Dmr1Hi(DAG.getMachineNode(
12262 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12263 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12264 0);
12265
12266 MachineSDNode *ExtNode =
12267 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12268 Values.push_back(SDValue(ExtNode, 0));
12269 Values.push_back(SDValue(ExtNode, 1));
12270 ExtNode =
12271 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12272 Values.push_back(SDValue(ExtNode, 0));
12273 Values.push_back(SDValue(ExtNode, 1));
12274 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12275 Values.push_back(SDValue(ExtNode, 0));
12276 Values.push_back(SDValue(ExtNode, 1));
12277 ExtNode =
12278 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12279 Values.push_back(SDValue(ExtNode, 0));
12280 Values.push_back(SDValue(ExtNode, 1));
12281 }
12282
12283 if (Subtarget.isLittleEndian())
12284 std::reverse(Values.begin(), Values.end());
12285
12286 SDVTList Tys = DAG.getVTList(MVT::Other);
12288 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12289 Values[0], BasePtr};
12290 MachineMemOperand *MMO = SN->getMemOperand();
12291 unsigned NumVecs = VT.getSizeInBits() / 256;
12292 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12293 MachineMemOperand *NewMMO =
12294 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12295 if (Idx > 0) {
12296 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12297 DAG.getConstant(32, dl, BasePtr.getValueType()));
12298 Ops[3] = BasePtr;
12299 }
12300 Ops[2] = Values[Idx];
12301 SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
12302 MVT::v256i1, NewMMO);
12303 Stores.push_back(St);
12304 }
12305
12306 SDValue TF = DAG.getTokenFactor(dl, Stores);
12307 return TF;
12308}
12309
12310SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12311 SelectionDAG &DAG) const {
12312 SDLoc dl(Op);
12313 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12314 SDValue StoreChain = SN->getChain();
12315 SDValue BasePtr = SN->getBasePtr();
12316 SDValue Value = SN->getValue();
12317 SDValue Value2 = SN->getValue();
12318 EVT StoreVT = Value.getValueType();
12319
12320 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12321 return LowerDMFVectorStore(Op, DAG);
12322
12323 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12324 return Op;
12325
12326 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12327 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12328 // underlying registers individually.
12329 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12330 "Type unsupported without MMA");
12331 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12332 "Type unsupported without paired vector support");
12333 Align Alignment = SN->getAlign();
12335 unsigned NumVecs = 2;
12336 if (StoreVT == MVT::v512i1) {
12337 if (Subtarget.isISAFuture()) {
12338 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12339 MachineSDNode *ExtNode = DAG.getMachineNode(
12340 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12341
12342 Value = SDValue(ExtNode, 0);
12343 Value2 = SDValue(ExtNode, 1);
12344 } else
12345 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12346 NumVecs = 4;
12347 }
12348 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12349 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12350 SDValue Elt;
12351 if (Subtarget.isISAFuture()) {
12352 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12353 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12354 Idx > 1 ? Value2 : Value,
12355 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12356 } else
12357 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12358 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12359
12360 SDValue Store =
12361 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12362 SN->getPointerInfo().getWithOffset(Idx * 16),
12363 commonAlignment(Alignment, Idx * 16),
12364 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12365 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12366 DAG.getConstant(16, dl, BasePtr.getValueType()));
12367 Stores.push_back(Store);
12368 }
12369 SDValue TF = DAG.getTokenFactor(dl, Stores);
12370 return TF;
12371}
12372
12373SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12374 SDLoc dl(Op);
12375 if (Op.getValueType() == MVT::v4i32) {
12376 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12377
12378 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12379 // +16 as shift amt.
12380 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12381 SDValue RHSSwap = // = vrlw RHS, 16
12382 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12383
12384 // Shrinkify inputs to v8i16.
12385 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12386 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12387 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12388
12389 // Low parts multiplied together, generating 32-bit results (we ignore the
12390 // top parts).
12391 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12392 LHS, RHS, DAG, dl, MVT::v4i32);
12393
12394 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12395 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12396 // Shift the high parts up 16 bits.
12397 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12398 Neg16, DAG, dl);
12399 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12400 } else if (Op.getValueType() == MVT::v16i8) {
12401 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12402 bool isLittleEndian = Subtarget.isLittleEndian();
12403
12404 // Multiply the even 8-bit parts, producing 16-bit sums.
12405 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12406 LHS, RHS, DAG, dl, MVT::v8i16);
12407 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12408
12409 // Multiply the odd 8-bit parts, producing 16-bit sums.
12410 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12411 LHS, RHS, DAG, dl, MVT::v8i16);
12412 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12413
12414 // Merge the results together. Because vmuleub and vmuloub are
12415 // instructions with a big-endian bias, we must reverse the
12416 // element numbering and reverse the meaning of "odd" and "even"
12417 // when generating little endian code.
12418 int Ops[16];
12419 for (unsigned i = 0; i != 8; ++i) {
12420 if (isLittleEndian) {
12421 Ops[i*2 ] = 2*i;
12422 Ops[i*2+1] = 2*i+16;
12423 } else {
12424 Ops[i*2 ] = 2*i+1;
12425 Ops[i*2+1] = 2*i+1+16;
12426 }
12427 }
12428 if (isLittleEndian)
12429 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12430 else
12431 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12432 } else {
12433 llvm_unreachable("Unknown mul to lower!");
12434 }
12435}
12436
12437SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12438 bool IsStrict = Op->isStrictFPOpcode();
12439 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12440 !Subtarget.hasP9Vector())
12441 return SDValue();
12442
12443 return Op;
12444}
12445
12446// Custom lowering for fpext vf32 to v2f64
12447SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12448
12449 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12450 "Should only be called for ISD::FP_EXTEND");
12451
12452 // FIXME: handle extends from half precision float vectors on P9.
12453 // We only want to custom lower an extend from v2f32 to v2f64.
12454 if (Op.getValueType() != MVT::v2f64 ||
12455 Op.getOperand(0).getValueType() != MVT::v2f32)
12456 return SDValue();
12457
12458 SDLoc dl(Op);
12459 SDValue Op0 = Op.getOperand(0);
12460
12461 switch (Op0.getOpcode()) {
12462 default:
12463 return SDValue();
12465 assert(Op0.getNumOperands() == 2 &&
12466 isa<ConstantSDNode>(Op0->getOperand(1)) &&
12467 "Node should have 2 operands with second one being a constant!");
12468
12469 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12470 return SDValue();
12471
12472 // Custom lower is only done for high or low doubleword.
12473 int Idx = Op0.getConstantOperandVal(1);
12474 if (Idx % 2 != 0)
12475 return SDValue();
12476
12477 // Since input is v4f32, at this point Idx is either 0 or 2.
12478 // Shift to get the doubleword position we want.
12479 int DWord = Idx >> 1;
12480
12481 // High and low word positions are different on little endian.
12482 if (Subtarget.isLittleEndian())
12483 DWord ^= 0x1;
12484
12485 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12486 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12487 }
12488 case ISD::FADD:
12489 case ISD::FMUL:
12490 case ISD::FSUB: {
12491 SDValue NewLoad[2];
12492 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12493 // Ensure both input are loads.
12494 SDValue LdOp = Op0.getOperand(i);
12495 if (LdOp.getOpcode() != ISD::LOAD)
12496 return SDValue();
12497 // Generate new load node.
12498 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12499 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12500 NewLoad[i] = DAG.getMemIntrinsicNode(
12501 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12502 LD->getMemoryVT(), LD->getMemOperand());
12503 }
12504 SDValue NewOp =
12505 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12506 NewLoad[1], Op0.getNode()->getFlags());
12507 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12508 DAG.getConstant(0, dl, MVT::i32));
12509 }
12510 case ISD::LOAD: {
12511 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12512 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12513 SDValue NewLd = DAG.getMemIntrinsicNode(
12514 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12515 LD->getMemoryVT(), LD->getMemOperand());
12516 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12517 DAG.getConstant(0, dl, MVT::i32));
12518 }
12519 }
12520 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12521}
12522
12524 SelectionDAG &DAG,
12525 const PPCSubtarget &STI) {
12526 SDLoc DL(Value);
12527 if (STI.useCRBits())
12528 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12529 DAG.getConstant(1, DL, SumType),
12530 DAG.getConstant(0, DL, SumType));
12531 else
12532 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12533 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12534 Value, DAG.getAllOnesConstant(DL, SumType));
12535 return Sum.getValue(1);
12536}
12537
12539 EVT CarryType, SelectionDAG &DAG,
12540 const PPCSubtarget &STI) {
12541 SDLoc DL(Flag);
12542 SDValue Zero = DAG.getConstant(0, DL, SumType);
12543 SDValue Carry = DAG.getNode(
12544 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12545 if (STI.useCRBits())
12546 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12547 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12548}
12549
12550SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12551
12552 SDLoc DL(Op);
12553 SDNode *N = Op.getNode();
12554 EVT VT = N->getValueType(0);
12555 EVT CarryType = N->getValueType(1);
12556 unsigned Opc = N->getOpcode();
12557 bool IsAdd = Opc == ISD::UADDO;
12558 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12559 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12560 N->getOperand(0), N->getOperand(1));
12561 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12562 DAG, Subtarget);
12563 if (!IsAdd)
12564 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12565 DAG.getConstant(1UL, DL, CarryType));
12566 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12567}
12568
12569SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12570 SelectionDAG &DAG) const {
12571 SDLoc DL(Op);
12572 SDNode *N = Op.getNode();
12573 unsigned Opc = N->getOpcode();
12574 EVT VT = N->getValueType(0);
12575 EVT CarryType = N->getValueType(1);
12576 SDValue CarryOp = N->getOperand(2);
12577 bool IsAdd = Opc == ISD::UADDO_CARRY;
12578 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12579 if (!IsAdd)
12580 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12581 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12582 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12583 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12584 Op.getOperand(0), Op.getOperand(1), CarryOp);
12585 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12586 Subtarget);
12587 if (!IsAdd)
12588 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12589 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12590 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12591}
12592
12593SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12594
12595 SDLoc dl(Op);
12596 SDValue LHS = Op.getOperand(0);
12597 SDValue RHS = Op.getOperand(1);
12598 EVT VT = Op.getNode()->getValueType(0);
12599
12600 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12601
12602 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12603 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12604
12605 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12606
12607 SDValue Overflow =
12608 DAG.getNode(ISD::SRL, dl, VT, And,
12609 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12610
12611 SDValue OverflowTrunc =
12612 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12613
12614 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12615}
12616
12617/// LowerOperation - Provide custom lowering hooks for some operations.
12618///
12620 switch (Op.getOpcode()) {
12621 default:
12622 llvm_unreachable("Wasn't expecting to be able to lower this!");
12623 case ISD::FPOW: return lowerPow(Op, DAG);
12624 case ISD::FSIN: return lowerSin(Op, DAG);
12625 case ISD::FCOS: return lowerCos(Op, DAG);
12626 case ISD::FLOG: return lowerLog(Op, DAG);
12627 case ISD::FLOG10: return lowerLog10(Op, DAG);
12628 case ISD::FEXP: return lowerExp(Op, DAG);
12629 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12630 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12631 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12632 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12633 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12634 case ISD::STRICT_FSETCC:
12636 case ISD::SETCC: return LowerSETCC(Op, DAG);
12637 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12638 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12639 case ISD::SSUBO:
12640 return LowerSSUBO(Op, DAG);
12641
12642 case ISD::INLINEASM:
12643 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12644 // Variable argument lowering.
12645 case ISD::VASTART: return LowerVASTART(Op, DAG);
12646 case ISD::VAARG: return LowerVAARG(Op, DAG);
12647 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12648
12649 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12650 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12652 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12653
12654 // Exception handling lowering.
12655 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12656 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12657 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12658
12659 case ISD::LOAD: return LowerLOAD(Op, DAG);
12660 case ISD::STORE: return LowerSTORE(Op, DAG);
12661 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12662 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12665 case ISD::FP_TO_UINT:
12666 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12669 case ISD::UINT_TO_FP:
12670 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12671 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12672 case ISD::SET_ROUNDING:
12673 return LowerSET_ROUNDING(Op, DAG);
12674
12675 // Lower 64-bit shifts.
12676 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12677 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12678 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12679
12680 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12681 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12682
12683 // Vector-related lowering.
12684 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12685 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12686 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12687 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12688 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12689 case ISD::MUL: return LowerMUL(Op, DAG);
12690 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12692 case ISD::FP_ROUND:
12693 return LowerFP_ROUND(Op, DAG);
12694 case ISD::ROTL: return LowerROTL(Op, DAG);
12695
12696 // For counter-based loop handling.
12697 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12698
12699 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12700
12701 // Frame & Return address.
12702 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12703 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12704
12706 return LowerINTRINSIC_VOID(Op, DAG);
12707 case ISD::BSWAP:
12708 return LowerBSWAP(Op, DAG);
12710 return LowerATOMIC_CMP_SWAP(Op, DAG);
12711 case ISD::ATOMIC_STORE:
12712 return LowerATOMIC_LOAD_STORE(Op, DAG);
12713 case ISD::IS_FPCLASS:
12714 return LowerIS_FPCLASS(Op, DAG);
12715 case ISD::UADDO:
12716 case ISD::USUBO:
12717 return LowerADDSUBO(Op, DAG);
12718 case ISD::UADDO_CARRY:
12719 case ISD::USUBO_CARRY:
12720 return LowerADDSUBO_CARRY(Op, DAG);
12721 }
12722}
12723
12726 SelectionDAG &DAG) const {
12727 SDLoc dl(N);
12728 switch (N->getOpcode()) {
12729 default:
12730 llvm_unreachable("Do not know how to custom type legalize this operation!");
12731 case ISD::ATOMIC_LOAD: {
12732 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12733 Results.push_back(Res);
12734 Results.push_back(Res.getValue(1));
12735 break;
12736 }
12737 case ISD::READCYCLECOUNTER: {
12738 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12739 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12740
12741 Results.push_back(
12742 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12743 Results.push_back(RTB.getValue(2));
12744 break;
12745 }
12747 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12748 break;
12749
12750 assert(N->getValueType(0) == MVT::i1 &&
12751 "Unexpected result type for CTR decrement intrinsic");
12752 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12753 N->getValueType(0));
12754 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12755 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12756 N->getOperand(1));
12757
12758 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12759 Results.push_back(NewInt.getValue(1));
12760 break;
12761 }
12763 switch (N->getConstantOperandVal(0)) {
12764 case Intrinsic::ppc_pack_longdouble:
12765 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12766 N->getOperand(2), N->getOperand(1)));
12767 break;
12768 case Intrinsic::ppc_maxfe:
12769 case Intrinsic::ppc_minfe:
12770 case Intrinsic::ppc_fnmsub:
12771 case Intrinsic::ppc_convert_f128_to_ppcf128:
12772 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12773 break;
12774 }
12775 break;
12776 }
12777 case ISD::VAARG: {
12778 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12779 return;
12780
12781 EVT VT = N->getValueType(0);
12782
12783 if (VT == MVT::i64) {
12784 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12785
12786 Results.push_back(NewNode);
12787 Results.push_back(NewNode.getValue(1));
12788 }
12789 return;
12790 }
12793 case ISD::FP_TO_SINT:
12794 case ISD::FP_TO_UINT: {
12795 // LowerFP_TO_INT() can only handle f32 and f64.
12796 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12797 MVT::ppcf128)
12798 return;
12799 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12800 Results.push_back(LoweredValue);
12801 if (N->isStrictFPOpcode())
12802 Results.push_back(LoweredValue.getValue(1));
12803 return;
12804 }
12805 case ISD::TRUNCATE: {
12806 if (!N->getValueType(0).isVector())
12807 return;
12808 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12809 if (Lowered)
12810 Results.push_back(Lowered);
12811 return;
12812 }
12813 case ISD::SCALAR_TO_VECTOR: {
12814 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12815 if (Lowered)
12816 Results.push_back(Lowered);
12817 return;
12818 }
12819 case ISD::FSHL:
12820 case ISD::FSHR:
12821 // Don't handle funnel shifts here.
12822 return;
12823 case ISD::BITCAST:
12824 // Don't handle bitcast here.
12825 return;
12826 case ISD::FP_EXTEND:
12827 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12828 if (Lowered)
12829 Results.push_back(Lowered);
12830 return;
12831 }
12832}
12833
12834//===----------------------------------------------------------------------===//
12835// Other Lowering Code
12836//===----------------------------------------------------------------------===//
12837
12839 return Builder.CreateIntrinsic(Id, {});
12840}
12841
12843 Value *Addr,
12844 AtomicOrdering Ord) const {
12845 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12846
12847 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12848 "Only 8/16/32/64-bit atomic loads supported");
12849 Intrinsic::ID IntID;
12850 switch (SZ) {
12851 default:
12852 llvm_unreachable("Unexpected PrimitiveSize");
12853 case 8:
12854 IntID = Intrinsic::ppc_lbarx;
12855 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12856 break;
12857 case 16:
12858 IntID = Intrinsic::ppc_lharx;
12859 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12860 break;
12861 case 32:
12862 IntID = Intrinsic::ppc_lwarx;
12863 break;
12864 case 64:
12865 IntID = Intrinsic::ppc_ldarx;
12866 break;
12867 }
12868 Value *Call =
12869 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12870
12871 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12872}
12873
12874// Perform a store-conditional operation to Addr. Return the status of the
12875// store. This should be 0 if the store succeeded, non-zero otherwise.
12877 Value *Val, Value *Addr,
12878 AtomicOrdering Ord) const {
12879 Type *Ty = Val->getType();
12880 unsigned SZ = Ty->getPrimitiveSizeInBits();
12881
12882 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12883 "Only 8/16/32/64-bit atomic loads supported");
12884 Intrinsic::ID IntID;
12885 switch (SZ) {
12886 default:
12887 llvm_unreachable("Unexpected PrimitiveSize");
12888 case 8:
12889 IntID = Intrinsic::ppc_stbcx;
12890 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12891 break;
12892 case 16:
12893 IntID = Intrinsic::ppc_sthcx;
12894 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12895 break;
12896 case 32:
12897 IntID = Intrinsic::ppc_stwcx;
12898 break;
12899 case 64:
12900 IntID = Intrinsic::ppc_stdcx;
12901 break;
12902 }
12903
12904 if (SZ == 8 || SZ == 16)
12905 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12906
12907 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12908 /*FMFSource=*/nullptr, "stcx");
12909 return Builder.CreateXor(Call, Builder.getInt32(1));
12910}
12911
12912// The mappings for emitLeading/TrailingFence is taken from
12913// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12915 Instruction *Inst,
12916 AtomicOrdering Ord) const {
12918 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12919 if (isReleaseOrStronger(Ord))
12920 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12921 return nullptr;
12922}
12923
12925 Instruction *Inst,
12926 AtomicOrdering Ord) const {
12927 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12928 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12929 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12930 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12931 if (isa<LoadInst>(Inst))
12932 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12933 {Inst});
12934 // FIXME: Can use isync for rmw operation.
12935 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12936 }
12937 return nullptr;
12938}
12939
12942 unsigned AtomicSize,
12943 unsigned BinOpcode,
12944 unsigned CmpOpcode,
12945 unsigned CmpPred) const {
12946 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12947 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12948
12949 auto LoadMnemonic = PPC::LDARX;
12950 auto StoreMnemonic = PPC::STDCX;
12951 switch (AtomicSize) {
12952 default:
12953 llvm_unreachable("Unexpected size of atomic entity");
12954 case 1:
12955 LoadMnemonic = PPC::LBARX;
12956 StoreMnemonic = PPC::STBCX;
12957 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12958 break;
12959 case 2:
12960 LoadMnemonic = PPC::LHARX;
12961 StoreMnemonic = PPC::STHCX;
12962 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12963 break;
12964 case 4:
12965 LoadMnemonic = PPC::LWARX;
12966 StoreMnemonic = PPC::STWCX;
12967 break;
12968 case 8:
12969 LoadMnemonic = PPC::LDARX;
12970 StoreMnemonic = PPC::STDCX;
12971 break;
12972 }
12973
12974 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12975 MachineFunction *F = BB->getParent();
12977
12978 Register dest = MI.getOperand(0).getReg();
12979 Register ptrA = MI.getOperand(1).getReg();
12980 Register ptrB = MI.getOperand(2).getReg();
12981 Register incr = MI.getOperand(3).getReg();
12982 DebugLoc dl = MI.getDebugLoc();
12983
12984 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12985 MachineBasicBlock *loop2MBB =
12986 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12987 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12988 F->insert(It, loopMBB);
12989 if (CmpOpcode)
12990 F->insert(It, loop2MBB);
12991 F->insert(It, exitMBB);
12992 exitMBB->splice(exitMBB->begin(), BB,
12993 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12995
12996 MachineRegisterInfo &RegInfo = F->getRegInfo();
12997 Register TmpReg = (!BinOpcode) ? incr :
12998 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12999 : &PPC::GPRCRegClass);
13000
13001 // thisMBB:
13002 // ...
13003 // fallthrough --> loopMBB
13004 BB->addSuccessor(loopMBB);
13005
13006 // loopMBB:
13007 // l[wd]arx dest, ptr
13008 // add r0, dest, incr
13009 // st[wd]cx. r0, ptr
13010 // bne- loopMBB
13011 // fallthrough --> exitMBB
13012
13013 // For max/min...
13014 // loopMBB:
13015 // l[wd]arx dest, ptr
13016 // cmpl?[wd] dest, incr
13017 // bgt exitMBB
13018 // loop2MBB:
13019 // st[wd]cx. dest, ptr
13020 // bne- loopMBB
13021 // fallthrough --> exitMBB
13022
13023 BB = loopMBB;
13024 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13025 .addReg(ptrA).addReg(ptrB);
13026 if (BinOpcode)
13027 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13028 if (CmpOpcode) {
13029 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13030 // Signed comparisons of byte or halfword values must be sign-extended.
13031 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13032 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13033 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13034 ExtReg).addReg(dest);
13035 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13036 } else
13037 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13038
13039 BuildMI(BB, dl, TII->get(PPC::BCC))
13040 .addImm(CmpPred)
13041 .addReg(CrReg)
13042 .addMBB(exitMBB);
13043 BB->addSuccessor(loop2MBB);
13044 BB->addSuccessor(exitMBB);
13045 BB = loop2MBB;
13046 }
13047 BuildMI(BB, dl, TII->get(StoreMnemonic))
13048 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13049 BuildMI(BB, dl, TII->get(PPC::BCC))
13050 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
13051 BB->addSuccessor(loopMBB);
13052 BB->addSuccessor(exitMBB);
13053
13054 // exitMBB:
13055 // ...
13056 BB = exitMBB;
13057 return BB;
13058}
13059
13061 switch(MI.getOpcode()) {
13062 default:
13063 return false;
13064 case PPC::COPY:
13065 return TII->isSignExtended(MI.getOperand(1).getReg(),
13066 &MI.getMF()->getRegInfo());
13067 case PPC::LHA:
13068 case PPC::LHA8:
13069 case PPC::LHAU:
13070 case PPC::LHAU8:
13071 case PPC::LHAUX:
13072 case PPC::LHAUX8:
13073 case PPC::LHAX:
13074 case PPC::LHAX8:
13075 case PPC::LWA:
13076 case PPC::LWAUX:
13077 case PPC::LWAX:
13078 case PPC::LWAX_32:
13079 case PPC::LWA_32:
13080 case PPC::PLHA:
13081 case PPC::PLHA8:
13082 case PPC::PLHA8pc:
13083 case PPC::PLHApc:
13084 case PPC::PLWA:
13085 case PPC::PLWA8:
13086 case PPC::PLWA8pc:
13087 case PPC::PLWApc:
13088 case PPC::EXTSB:
13089 case PPC::EXTSB8:
13090 case PPC::EXTSB8_32_64:
13091 case PPC::EXTSB8_rec:
13092 case PPC::EXTSB_rec:
13093 case PPC::EXTSH:
13094 case PPC::EXTSH8:
13095 case PPC::EXTSH8_32_64:
13096 case PPC::EXTSH8_rec:
13097 case PPC::EXTSH_rec:
13098 case PPC::EXTSW:
13099 case PPC::EXTSWSLI:
13100 case PPC::EXTSWSLI_32_64:
13101 case PPC::EXTSWSLI_32_64_rec:
13102 case PPC::EXTSWSLI_rec:
13103 case PPC::EXTSW_32:
13104 case PPC::EXTSW_32_64:
13105 case PPC::EXTSW_32_64_rec:
13106 case PPC::EXTSW_rec:
13107 case PPC::SRAW:
13108 case PPC::SRAWI:
13109 case PPC::SRAWI_rec:
13110 case PPC::SRAW_rec:
13111 return true;
13112 }
13113 return false;
13114}
13115
13118 bool is8bit, // operation
13119 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13120 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13121 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13122
13123 // If this is a signed comparison and the value being compared is not known
13124 // to be sign extended, sign extend it here.
13125 DebugLoc dl = MI.getDebugLoc();
13126 MachineFunction *F = BB->getParent();
13127 MachineRegisterInfo &RegInfo = F->getRegInfo();
13128 Register incr = MI.getOperand(3).getReg();
13129 bool IsSignExtended =
13130 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13131
13132 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13133 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13134 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13135 .addReg(MI.getOperand(3).getReg());
13136 MI.getOperand(3).setReg(ValueReg);
13137 incr = ValueReg;
13138 }
13139 // If we support part-word atomic mnemonics, just use them
13140 if (Subtarget.hasPartwordAtomics())
13141 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13142 CmpPred);
13143
13144 // In 64 bit mode we have to use 64 bits for addresses, even though the
13145 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13146 // registers without caring whether they're 32 or 64, but here we're
13147 // doing actual arithmetic on the addresses.
13148 bool is64bit = Subtarget.isPPC64();
13149 bool isLittleEndian = Subtarget.isLittleEndian();
13150 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13151
13152 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13154
13155 Register dest = MI.getOperand(0).getReg();
13156 Register ptrA = MI.getOperand(1).getReg();
13157 Register ptrB = MI.getOperand(2).getReg();
13158
13159 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13160 MachineBasicBlock *loop2MBB =
13161 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13162 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13163 F->insert(It, loopMBB);
13164 if (CmpOpcode)
13165 F->insert(It, loop2MBB);
13166 F->insert(It, exitMBB);
13167 exitMBB->splice(exitMBB->begin(), BB,
13168 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13170
13171 const TargetRegisterClass *RC =
13172 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13173 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13174
13175 Register PtrReg = RegInfo.createVirtualRegister(RC);
13176 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13177 Register ShiftReg =
13178 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13179 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13180 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13181 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13182 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13183 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13184 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13185 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13186 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13187 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13188 Register Ptr1Reg;
13189 Register TmpReg =
13190 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13191
13192 // thisMBB:
13193 // ...
13194 // fallthrough --> loopMBB
13195 BB->addSuccessor(loopMBB);
13196
13197 // The 4-byte load must be aligned, while a char or short may be
13198 // anywhere in the word. Hence all this nasty bookkeeping code.
13199 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13200 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13201 // xori shift, shift1, 24 [16]
13202 // rlwinm ptr, ptr1, 0, 0, 29
13203 // slw incr2, incr, shift
13204 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13205 // slw mask, mask2, shift
13206 // loopMBB:
13207 // lwarx tmpDest, ptr
13208 // add tmp, tmpDest, incr2
13209 // andc tmp2, tmpDest, mask
13210 // and tmp3, tmp, mask
13211 // or tmp4, tmp3, tmp2
13212 // stwcx. tmp4, ptr
13213 // bne- loopMBB
13214 // fallthrough --> exitMBB
13215 // srw SrwDest, tmpDest, shift
13216 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13217 if (ptrA != ZeroReg) {
13218 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13219 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13220 .addReg(ptrA)
13221 .addReg(ptrB);
13222 } else {
13223 Ptr1Reg = ptrB;
13224 }
13225 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13226 // mode.
13227 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13228 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13229 .addImm(3)
13230 .addImm(27)
13231 .addImm(is8bit ? 28 : 27);
13232 if (!isLittleEndian)
13233 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13234 .addReg(Shift1Reg)
13235 .addImm(is8bit ? 24 : 16);
13236 if (is64bit)
13237 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13238 .addReg(Ptr1Reg)
13239 .addImm(0)
13240 .addImm(61);
13241 else
13242 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13243 .addReg(Ptr1Reg)
13244 .addImm(0)
13245 .addImm(0)
13246 .addImm(29);
13247 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13248 if (is8bit)
13249 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13250 else {
13251 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13252 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13253 .addReg(Mask3Reg)
13254 .addImm(65535);
13255 }
13256 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13257 .addReg(Mask2Reg)
13258 .addReg(ShiftReg);
13259
13260 BB = loopMBB;
13261 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13262 .addReg(ZeroReg)
13263 .addReg(PtrReg);
13264 if (BinOpcode)
13265 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13266 .addReg(Incr2Reg)
13267 .addReg(TmpDestReg);
13268 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13269 .addReg(TmpDestReg)
13270 .addReg(MaskReg);
13271 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13272 if (CmpOpcode) {
13273 // For unsigned comparisons, we can directly compare the shifted values.
13274 // For signed comparisons we shift and sign extend.
13275 Register SReg = RegInfo.createVirtualRegister(GPRC);
13276 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13277 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13278 .addReg(TmpDestReg)
13279 .addReg(MaskReg);
13280 unsigned ValueReg = SReg;
13281 unsigned CmpReg = Incr2Reg;
13282 if (CmpOpcode == PPC::CMPW) {
13283 ValueReg = RegInfo.createVirtualRegister(GPRC);
13284 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13285 .addReg(SReg)
13286 .addReg(ShiftReg);
13287 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13288 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13289 .addReg(ValueReg);
13290 ValueReg = ValueSReg;
13291 CmpReg = incr;
13292 }
13293 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13294 BuildMI(BB, dl, TII->get(PPC::BCC))
13295 .addImm(CmpPred)
13296 .addReg(CrReg)
13297 .addMBB(exitMBB);
13298 BB->addSuccessor(loop2MBB);
13299 BB->addSuccessor(exitMBB);
13300 BB = loop2MBB;
13301 }
13302 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13303 BuildMI(BB, dl, TII->get(PPC::STWCX))
13304 .addReg(Tmp4Reg)
13305 .addReg(ZeroReg)
13306 .addReg(PtrReg);
13307 BuildMI(BB, dl, TII->get(PPC::BCC))
13309 .addReg(PPC::CR0)
13310 .addMBB(loopMBB);
13311 BB->addSuccessor(loopMBB);
13312 BB->addSuccessor(exitMBB);
13313
13314 // exitMBB:
13315 // ...
13316 BB = exitMBB;
13317 // Since the shift amount is not a constant, we need to clear
13318 // the upper bits with a separate RLWINM.
13319 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13320 .addReg(SrwDestReg)
13321 .addImm(0)
13322 .addImm(is8bit ? 24 : 16)
13323 .addImm(31);
13324 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13325 .addReg(TmpDestReg)
13326 .addReg(ShiftReg);
13327 return BB;
13328}
13329
13332 MachineBasicBlock *MBB) const {
13333 DebugLoc DL = MI.getDebugLoc();
13334 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13335 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13336
13337 MachineFunction *MF = MBB->getParent();
13339
13340 const BasicBlock *BB = MBB->getBasicBlock();
13342
13343 Register DstReg = MI.getOperand(0).getReg();
13344 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13345 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13346 Register mainDstReg = MRI.createVirtualRegister(RC);
13347 Register restoreDstReg = MRI.createVirtualRegister(RC);
13348
13349 MVT PVT = getPointerTy(MF->getDataLayout());
13350 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13351 "Invalid Pointer Size!");
13352 // For v = setjmp(buf), we generate
13353 //
13354 // thisMBB:
13355 // SjLjSetup mainMBB
13356 // bl mainMBB
13357 // v_restore = 1
13358 // b sinkMBB
13359 //
13360 // mainMBB:
13361 // buf[LabelOffset] = LR
13362 // v_main = 0
13363 //
13364 // sinkMBB:
13365 // v = phi(main, restore)
13366 //
13367
13368 MachineBasicBlock *thisMBB = MBB;
13369 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13370 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13371 MF->insert(I, mainMBB);
13372 MF->insert(I, sinkMBB);
13373
13375
13376 // Transfer the remainder of BB and its successor edges to sinkMBB.
13377 sinkMBB->splice(sinkMBB->begin(), MBB,
13378 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13380
13381 // Note that the structure of the jmp_buf used here is not compatible
13382 // with that used by libc, and is not designed to be. Specifically, it
13383 // stores only those 'reserved' registers that LLVM does not otherwise
13384 // understand how to spill. Also, by convention, by the time this
13385 // intrinsic is called, Clang has already stored the frame address in the
13386 // first slot of the buffer and stack address in the third. Following the
13387 // X86 target code, we'll store the jump address in the second slot. We also
13388 // need to save the TOC pointer (R2) to handle jumps between shared
13389 // libraries, and that will be stored in the fourth slot. The thread
13390 // identifier (R13) is not affected.
13391
13392 // thisMBB:
13393 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13394 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13395 const int64_t BPOffset = 4 * PVT.getStoreSize();
13396
13397 // Prepare IP either in reg.
13398 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13399 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13400 Register BufReg = MI.getOperand(1).getReg();
13401
13402 if (Subtarget.is64BitELFABI()) {
13404 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13405 .addReg(PPC::X2)
13406 .addImm(TOCOffset)
13407 .addReg(BufReg)
13408 .cloneMemRefs(MI);
13409 }
13410
13411 // Naked functions never have a base pointer, and so we use r1. For all
13412 // other functions, this decision must be delayed until during PEI.
13413 unsigned BaseReg;
13414 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13415 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13416 else
13417 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13418
13419 MIB = BuildMI(*thisMBB, MI, DL,
13420 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13421 .addReg(BaseReg)
13422 .addImm(BPOffset)
13423 .addReg(BufReg)
13424 .cloneMemRefs(MI);
13425
13426 // Setup
13427 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13428 MIB.addRegMask(TRI->getNoPreservedMask());
13429
13430 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13431
13432 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13433 .addMBB(mainMBB);
13434 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13435
13436 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13437 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13438
13439 // mainMBB:
13440 // mainDstReg = 0
13441 MIB =
13442 BuildMI(mainMBB, DL,
13443 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13444
13445 // Store IP
13446 if (Subtarget.isPPC64()) {
13447 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13448 .addReg(LabelReg)
13449 .addImm(LabelOffset)
13450 .addReg(BufReg);
13451 } else {
13452 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13453 .addReg(LabelReg)
13454 .addImm(LabelOffset)
13455 .addReg(BufReg);
13456 }
13457 MIB.cloneMemRefs(MI);
13458
13459 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13460 mainMBB->addSuccessor(sinkMBB);
13461
13462 // sinkMBB:
13463 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13464 TII->get(PPC::PHI), DstReg)
13465 .addReg(mainDstReg).addMBB(mainMBB)
13466 .addReg(restoreDstReg).addMBB(thisMBB);
13467
13468 MI.eraseFromParent();
13469 return sinkMBB;
13470}
13471
13474 MachineBasicBlock *MBB) const {
13475 DebugLoc DL = MI.getDebugLoc();
13476 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13477
13478 MachineFunction *MF = MBB->getParent();
13480
13481 MVT PVT = getPointerTy(MF->getDataLayout());
13482 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13483 "Invalid Pointer Size!");
13484
13485 const TargetRegisterClass *RC =
13486 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13487 Register Tmp = MRI.createVirtualRegister(RC);
13488 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13489 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13490 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13491 unsigned BP =
13492 (PVT == MVT::i64)
13493 ? PPC::X30
13494 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13495 : PPC::R30);
13496
13498
13499 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13500 const int64_t SPOffset = 2 * PVT.getStoreSize();
13501 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13502 const int64_t BPOffset = 4 * PVT.getStoreSize();
13503
13504 Register BufReg = MI.getOperand(0).getReg();
13505
13506 // Reload FP (the jumped-to function may not have had a
13507 // frame pointer, and if so, then its r31 will be restored
13508 // as necessary).
13509 if (PVT == MVT::i64) {
13510 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13511 .addImm(0)
13512 .addReg(BufReg);
13513 } else {
13514 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13515 .addImm(0)
13516 .addReg(BufReg);
13517 }
13518 MIB.cloneMemRefs(MI);
13519
13520 // Reload IP
13521 if (PVT == MVT::i64) {
13522 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13523 .addImm(LabelOffset)
13524 .addReg(BufReg);
13525 } else {
13526 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13527 .addImm(LabelOffset)
13528 .addReg(BufReg);
13529 }
13530 MIB.cloneMemRefs(MI);
13531
13532 // Reload SP
13533 if (PVT == MVT::i64) {
13534 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13535 .addImm(SPOffset)
13536 .addReg(BufReg);
13537 } else {
13538 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13539 .addImm(SPOffset)
13540 .addReg(BufReg);
13541 }
13542 MIB.cloneMemRefs(MI);
13543
13544 // Reload BP
13545 if (PVT == MVT::i64) {
13546 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13547 .addImm(BPOffset)
13548 .addReg(BufReg);
13549 } else {
13550 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13551 .addImm(BPOffset)
13552 .addReg(BufReg);
13553 }
13554 MIB.cloneMemRefs(MI);
13555
13556 // Reload TOC
13557 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13559 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13560 .addImm(TOCOffset)
13561 .addReg(BufReg)
13562 .cloneMemRefs(MI);
13563 }
13564
13565 // Jump
13566 BuildMI(*MBB, MI, DL,
13567 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13568 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13569
13570 MI.eraseFromParent();
13571 return MBB;
13572}
13573
13575 // If the function specifically requests inline stack probes, emit them.
13576 if (MF.getFunction().hasFnAttribute("probe-stack"))
13577 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13578 "inline-asm";
13579 return false;
13580}
13581
13583 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13584 unsigned StackAlign = TFI->getStackAlignment();
13585 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13586 "Unexpected stack alignment");
13587 // The default stack probe size is 4096 if the function has no
13588 // stack-probe-size attribute.
13589 const Function &Fn = MF.getFunction();
13590 unsigned StackProbeSize =
13591 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13592 // Round down to the stack alignment.
13593 StackProbeSize &= ~(StackAlign - 1);
13594 return StackProbeSize ? StackProbeSize : StackAlign;
13595}
13596
13597// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13598// into three phases. In the first phase, it uses pseudo instruction
13599// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13600// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13601// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13602// MaxCallFrameSize so that it can calculate correct data area pointer.
13605 MachineBasicBlock *MBB) const {
13606 const bool isPPC64 = Subtarget.isPPC64();
13607 MachineFunction *MF = MBB->getParent();
13608 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13609 DebugLoc DL = MI.getDebugLoc();
13610 const unsigned ProbeSize = getStackProbeSize(*MF);
13611 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13613 // The CFG of probing stack looks as
13614 // +-----+
13615 // | MBB |
13616 // +--+--+
13617 // |
13618 // +----v----+
13619 // +--->+ TestMBB +---+
13620 // | +----+----+ |
13621 // | | |
13622 // | +-----v----+ |
13623 // +---+ BlockMBB | |
13624 // +----------+ |
13625 // |
13626 // +---------+ |
13627 // | TailMBB +<--+
13628 // +---------+
13629 // In MBB, calculate previous frame pointer and final stack pointer.
13630 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13631 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13632 // TailMBB is spliced via \p MI.
13633 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13634 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13635 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13636
13638 MF->insert(MBBIter, TestMBB);
13639 MF->insert(MBBIter, BlockMBB);
13640 MF->insert(MBBIter, TailMBB);
13641
13642 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13643 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13644
13645 Register DstReg = MI.getOperand(0).getReg();
13646 Register NegSizeReg = MI.getOperand(1).getReg();
13647 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13648 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13649 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13650 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13651
13652 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13653 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13654 // NegSize.
13655 unsigned ProbeOpc;
13656 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13657 ProbeOpc =
13658 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13659 else
13660 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13661 // and NegSizeReg will be allocated in the same phyreg to avoid
13662 // redundant copy when NegSizeReg has only one use which is current MI and
13663 // will be replaced by PREPARE_PROBED_ALLOCA then.
13664 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13665 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13666 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13667 .addDef(ActualNegSizeReg)
13668 .addReg(NegSizeReg)
13669 .add(MI.getOperand(2))
13670 .add(MI.getOperand(3));
13671
13672 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13673 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13674 FinalStackPtr)
13675 .addReg(SPReg)
13676 .addReg(ActualNegSizeReg);
13677
13678 // Materialize a scratch register for update.
13679 int64_t NegProbeSize = -(int64_t)ProbeSize;
13680 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13681 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13682 if (!isInt<16>(NegProbeSize)) {
13683 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13684 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13685 .addImm(NegProbeSize >> 16);
13686 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13687 ScratchReg)
13688 .addReg(TempReg)
13689 .addImm(NegProbeSize & 0xFFFF);
13690 } else
13691 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13692 .addImm(NegProbeSize);
13693
13694 {
13695 // Probing leading residual part.
13696 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13697 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13698 .addReg(ActualNegSizeReg)
13699 .addReg(ScratchReg);
13700 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13701 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13702 .addReg(Div)
13703 .addReg(ScratchReg);
13704 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13705 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13706 .addReg(Mul)
13707 .addReg(ActualNegSizeReg);
13708 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13709 .addReg(FramePointer)
13710 .addReg(SPReg)
13711 .addReg(NegMod);
13712 }
13713
13714 {
13715 // Remaining part should be multiple of ProbeSize.
13716 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13717 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13718 .addReg(SPReg)
13719 .addReg(FinalStackPtr);
13720 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13722 .addReg(CmpResult)
13723 .addMBB(TailMBB);
13724 TestMBB->addSuccessor(BlockMBB);
13725 TestMBB->addSuccessor(TailMBB);
13726 }
13727
13728 {
13729 // Touch the block.
13730 // |P...|P...|P...
13731 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13732 .addReg(FramePointer)
13733 .addReg(SPReg)
13734 .addReg(ScratchReg);
13735 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13736 BlockMBB->addSuccessor(TestMBB);
13737 }
13738
13739 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13740 // DYNAREAOFFSET pseudo instruction to get the future result.
13741 Register MaxCallFrameSizeReg =
13742 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13743 BuildMI(TailMBB, DL,
13744 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13745 MaxCallFrameSizeReg)
13746 .add(MI.getOperand(2))
13747 .add(MI.getOperand(3));
13748 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13749 .addReg(SPReg)
13750 .addReg(MaxCallFrameSizeReg);
13751
13752 // Splice instructions after MI to TailMBB.
13753 TailMBB->splice(TailMBB->end(), MBB,
13754 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13756 MBB->addSuccessor(TestMBB);
13757
13758 // Delete the pseudo instruction.
13759 MI.eraseFromParent();
13760
13761 ++NumDynamicAllocaProbed;
13762 return TailMBB;
13763}
13764
13766 switch (MI.getOpcode()) {
13767 case PPC::SELECT_CC_I4:
13768 case PPC::SELECT_CC_I8:
13769 case PPC::SELECT_CC_F4:
13770 case PPC::SELECT_CC_F8:
13771 case PPC::SELECT_CC_F16:
13772 case PPC::SELECT_CC_VRRC:
13773 case PPC::SELECT_CC_VSFRC:
13774 case PPC::SELECT_CC_VSSRC:
13775 case PPC::SELECT_CC_VSRC:
13776 case PPC::SELECT_CC_SPE4:
13777 case PPC::SELECT_CC_SPE:
13778 return true;
13779 default:
13780 return false;
13781 }
13782}
13783
13784static bool IsSelect(MachineInstr &MI) {
13785 switch (MI.getOpcode()) {
13786 case PPC::SELECT_I4:
13787 case PPC::SELECT_I8:
13788 case PPC::SELECT_F4:
13789 case PPC::SELECT_F8:
13790 case PPC::SELECT_F16:
13791 case PPC::SELECT_SPE:
13792 case PPC::SELECT_SPE4:
13793 case PPC::SELECT_VRRC:
13794 case PPC::SELECT_VSFRC:
13795 case PPC::SELECT_VSSRC:
13796 case PPC::SELECT_VSRC:
13797 return true;
13798 default:
13799 return false;
13800 }
13801}
13802
13805 MachineBasicBlock *BB) const {
13806 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13807 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13808 if (Subtarget.is64BitELFABI() &&
13809 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13810 !Subtarget.isUsingPCRelativeCalls()) {
13811 // Call lowering should have added an r2 operand to indicate a dependence
13812 // on the TOC base pointer value. It can't however, because there is no
13813 // way to mark the dependence as implicit there, and so the stackmap code
13814 // will confuse it with a regular operand. Instead, add the dependence
13815 // here.
13816 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13817 }
13818
13819 return emitPatchPoint(MI, BB);
13820 }
13821
13822 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13823 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13824 return emitEHSjLjSetJmp(MI, BB);
13825 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13826 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13827 return emitEHSjLjLongJmp(MI, BB);
13828 }
13829
13830 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13831
13832 // To "insert" these instructions we actually have to insert their
13833 // control-flow patterns.
13834 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13836
13837 MachineFunction *F = BB->getParent();
13838 MachineRegisterInfo &MRI = F->getRegInfo();
13839
13840 if (Subtarget.hasISEL() &&
13841 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13842 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13843 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13845 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13846 MI.getOpcode() == PPC::SELECT_CC_I8)
13847 Cond.push_back(MI.getOperand(4));
13848 else
13850 Cond.push_back(MI.getOperand(1));
13851
13852 DebugLoc dl = MI.getDebugLoc();
13853 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13854 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13855 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13856 // The incoming instruction knows the destination vreg to set, the
13857 // condition code register to branch on, the true/false values to
13858 // select between, and a branch opcode to use.
13859
13860 // thisMBB:
13861 // ...
13862 // TrueVal = ...
13863 // cmpTY ccX, r1, r2
13864 // bCC sinkMBB
13865 // fallthrough --> copy0MBB
13866 MachineBasicBlock *thisMBB = BB;
13867 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13868 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13869 DebugLoc dl = MI.getDebugLoc();
13870 F->insert(It, copy0MBB);
13871 F->insert(It, sinkMBB);
13872
13873 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13874 copy0MBB->addLiveIn(PPC::CARRY);
13875 sinkMBB->addLiveIn(PPC::CARRY);
13876 }
13877
13878 // Set the call frame size on entry to the new basic blocks.
13879 // See https://reviews.llvm.org/D156113.
13880 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13881 copy0MBB->setCallFrameSize(CallFrameSize);
13882 sinkMBB->setCallFrameSize(CallFrameSize);
13883
13884 // Transfer the remainder of BB and its successor edges to sinkMBB.
13885 sinkMBB->splice(sinkMBB->begin(), BB,
13886 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13888
13889 // Next, add the true and fallthrough blocks as its successors.
13890 BB->addSuccessor(copy0MBB);
13891 BB->addSuccessor(sinkMBB);
13892
13893 if (IsSelect(MI)) {
13894 BuildMI(BB, dl, TII->get(PPC::BC))
13895 .addReg(MI.getOperand(1).getReg())
13896 .addMBB(sinkMBB);
13897 } else {
13898 unsigned SelectPred = MI.getOperand(4).getImm();
13899 BuildMI(BB, dl, TII->get(PPC::BCC))
13900 .addImm(SelectPred)
13901 .addReg(MI.getOperand(1).getReg())
13902 .addMBB(sinkMBB);
13903 }
13904
13905 // copy0MBB:
13906 // %FalseValue = ...
13907 // # fallthrough to sinkMBB
13908 BB = copy0MBB;
13909
13910 // Update machine-CFG edges
13911 BB->addSuccessor(sinkMBB);
13912
13913 // sinkMBB:
13914 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13915 // ...
13916 BB = sinkMBB;
13917 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13918 .addReg(MI.getOperand(3).getReg())
13919 .addMBB(copy0MBB)
13920 .addReg(MI.getOperand(2).getReg())
13921 .addMBB(thisMBB);
13922 } else if (MI.getOpcode() == PPC::ReadTB) {
13923 // To read the 64-bit time-base register on a 32-bit target, we read the
13924 // two halves. Should the counter have wrapped while it was being read, we
13925 // need to try again.
13926 // ...
13927 // readLoop:
13928 // mfspr Rx,TBU # load from TBU
13929 // mfspr Ry,TB # load from TB
13930 // mfspr Rz,TBU # load from TBU
13931 // cmpw crX,Rx,Rz # check if 'old'='new'
13932 // bne readLoop # branch if they're not equal
13933 // ...
13934
13935 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13936 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13937 DebugLoc dl = MI.getDebugLoc();
13938 F->insert(It, readMBB);
13939 F->insert(It, sinkMBB);
13940
13941 // Transfer the remainder of BB and its successor edges to sinkMBB.
13942 sinkMBB->splice(sinkMBB->begin(), BB,
13943 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13945
13946 BB->addSuccessor(readMBB);
13947 BB = readMBB;
13948
13949 MachineRegisterInfo &RegInfo = F->getRegInfo();
13950 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13951 Register LoReg = MI.getOperand(0).getReg();
13952 Register HiReg = MI.getOperand(1).getReg();
13953
13954 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13955 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13956 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13957
13958 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13959
13960 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13961 .addReg(HiReg)
13962 .addReg(ReadAgainReg);
13963 BuildMI(BB, dl, TII->get(PPC::BCC))
13965 .addReg(CmpReg)
13966 .addMBB(readMBB);
13967
13968 BB->addSuccessor(readMBB);
13969 BB->addSuccessor(sinkMBB);
13970 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13971 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13972 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13973 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13974 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13975 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13976 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13977 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13978
13979 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13980 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13981 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13982 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13983 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13984 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13985 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13986 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13987
13988 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13989 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13990 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13991 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13992 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13993 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13994 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13995 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13996
13997 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13998 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13999 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14000 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14001 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14002 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14003 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14004 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14005
14006 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14007 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14008 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14009 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14010 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14011 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14012 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14013 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14014
14015 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14016 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14017 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14018 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14019 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14020 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14021 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14022 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14023
14024 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14025 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14026 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14027 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14028 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14029 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14030 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14031 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14032
14033 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14034 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14035 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14036 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14037 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14038 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14039 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14040 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14041
14042 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14043 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14044 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14045 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14046 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14047 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14048 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14049 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14050
14051 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14052 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14053 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14054 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14055 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14056 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14057 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14058 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14059
14060 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14061 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14062 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14063 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14064 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14065 BB = EmitAtomicBinary(MI, BB, 4, 0);
14066 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14067 BB = EmitAtomicBinary(MI, BB, 8, 0);
14068 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14069 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14070 (Subtarget.hasPartwordAtomics() &&
14071 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14072 (Subtarget.hasPartwordAtomics() &&
14073 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14074 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14075
14076 auto LoadMnemonic = PPC::LDARX;
14077 auto StoreMnemonic = PPC::STDCX;
14078 switch (MI.getOpcode()) {
14079 default:
14080 llvm_unreachable("Compare and swap of unknown size");
14081 case PPC::ATOMIC_CMP_SWAP_I8:
14082 LoadMnemonic = PPC::LBARX;
14083 StoreMnemonic = PPC::STBCX;
14084 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14085 break;
14086 case PPC::ATOMIC_CMP_SWAP_I16:
14087 LoadMnemonic = PPC::LHARX;
14088 StoreMnemonic = PPC::STHCX;
14089 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14090 break;
14091 case PPC::ATOMIC_CMP_SWAP_I32:
14092 LoadMnemonic = PPC::LWARX;
14093 StoreMnemonic = PPC::STWCX;
14094 break;
14095 case PPC::ATOMIC_CMP_SWAP_I64:
14096 LoadMnemonic = PPC::LDARX;
14097 StoreMnemonic = PPC::STDCX;
14098 break;
14099 }
14100 MachineRegisterInfo &RegInfo = F->getRegInfo();
14101 Register dest = MI.getOperand(0).getReg();
14102 Register ptrA = MI.getOperand(1).getReg();
14103 Register ptrB = MI.getOperand(2).getReg();
14104 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14105 Register oldval = MI.getOperand(3).getReg();
14106 Register newval = MI.getOperand(4).getReg();
14107 DebugLoc dl = MI.getDebugLoc();
14108
14109 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14110 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14111 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14112 F->insert(It, loop1MBB);
14113 F->insert(It, loop2MBB);
14114 F->insert(It, exitMBB);
14115 exitMBB->splice(exitMBB->begin(), BB,
14116 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14118
14119 // thisMBB:
14120 // ...
14121 // fallthrough --> loopMBB
14122 BB->addSuccessor(loop1MBB);
14123
14124 // loop1MBB:
14125 // l[bhwd]arx dest, ptr
14126 // cmp[wd] dest, oldval
14127 // bne- exitBB
14128 // loop2MBB:
14129 // st[bhwd]cx. newval, ptr
14130 // bne- loopMBB
14131 // b exitBB
14132 // exitBB:
14133 BB = loop1MBB;
14134 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14135 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14136 .addReg(dest)
14137 .addReg(oldval);
14138 BuildMI(BB, dl, TII->get(PPC::BCC))
14140 .addReg(CrReg)
14141 .addMBB(exitMBB);
14142 BB->addSuccessor(loop2MBB);
14143 BB->addSuccessor(exitMBB);
14144
14145 BB = loop2MBB;
14146 BuildMI(BB, dl, TII->get(StoreMnemonic))
14147 .addReg(newval)
14148 .addReg(ptrA)
14149 .addReg(ptrB);
14150 BuildMI(BB, dl, TII->get(PPC::BCC))
14152 .addReg(PPC::CR0)
14153 .addMBB(loop1MBB);
14154 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14155 BB->addSuccessor(loop1MBB);
14156 BB->addSuccessor(exitMBB);
14157
14158 // exitMBB:
14159 // ...
14160 BB = exitMBB;
14161 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14162 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14163 // We must use 64-bit registers for addresses when targeting 64-bit,
14164 // since we're actually doing arithmetic on them. Other registers
14165 // can be 32-bit.
14166 bool is64bit = Subtarget.isPPC64();
14167 bool isLittleEndian = Subtarget.isLittleEndian();
14168 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14169
14170 Register dest = MI.getOperand(0).getReg();
14171 Register ptrA = MI.getOperand(1).getReg();
14172 Register ptrB = MI.getOperand(2).getReg();
14173 Register oldval = MI.getOperand(3).getReg();
14174 Register newval = MI.getOperand(4).getReg();
14175 DebugLoc dl = MI.getDebugLoc();
14176
14177 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14178 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14179 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14180 F->insert(It, loop1MBB);
14181 F->insert(It, loop2MBB);
14182 F->insert(It, exitMBB);
14183 exitMBB->splice(exitMBB->begin(), BB,
14184 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14186
14187 MachineRegisterInfo &RegInfo = F->getRegInfo();
14188 const TargetRegisterClass *RC =
14189 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14190 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14191
14192 Register PtrReg = RegInfo.createVirtualRegister(RC);
14193 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14194 Register ShiftReg =
14195 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14196 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14197 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14198 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14199 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14200 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14201 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14202 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14203 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14204 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14205 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14206 Register Ptr1Reg;
14207 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14208 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14209 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14210 // thisMBB:
14211 // ...
14212 // fallthrough --> loopMBB
14213 BB->addSuccessor(loop1MBB);
14214
14215 // The 4-byte load must be aligned, while a char or short may be
14216 // anywhere in the word. Hence all this nasty bookkeeping code.
14217 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14218 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14219 // xori shift, shift1, 24 [16]
14220 // rlwinm ptr, ptr1, 0, 0, 29
14221 // slw newval2, newval, shift
14222 // slw oldval2, oldval,shift
14223 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14224 // slw mask, mask2, shift
14225 // and newval3, newval2, mask
14226 // and oldval3, oldval2, mask
14227 // loop1MBB:
14228 // lwarx tmpDest, ptr
14229 // and tmp, tmpDest, mask
14230 // cmpw tmp, oldval3
14231 // bne- exitBB
14232 // loop2MBB:
14233 // andc tmp2, tmpDest, mask
14234 // or tmp4, tmp2, newval3
14235 // stwcx. tmp4, ptr
14236 // bne- loop1MBB
14237 // b exitBB
14238 // exitBB:
14239 // srw dest, tmpDest, shift
14240 if (ptrA != ZeroReg) {
14241 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14242 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14243 .addReg(ptrA)
14244 .addReg(ptrB);
14245 } else {
14246 Ptr1Reg = ptrB;
14247 }
14248
14249 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14250 // mode.
14251 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14252 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14253 .addImm(3)
14254 .addImm(27)
14255 .addImm(is8bit ? 28 : 27);
14256 if (!isLittleEndian)
14257 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14258 .addReg(Shift1Reg)
14259 .addImm(is8bit ? 24 : 16);
14260 if (is64bit)
14261 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14262 .addReg(Ptr1Reg)
14263 .addImm(0)
14264 .addImm(61);
14265 else
14266 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14267 .addReg(Ptr1Reg)
14268 .addImm(0)
14269 .addImm(0)
14270 .addImm(29);
14271 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14272 .addReg(newval)
14273 .addReg(ShiftReg);
14274 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14275 .addReg(oldval)
14276 .addReg(ShiftReg);
14277 if (is8bit)
14278 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14279 else {
14280 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14281 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14282 .addReg(Mask3Reg)
14283 .addImm(65535);
14284 }
14285 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14286 .addReg(Mask2Reg)
14287 .addReg(ShiftReg);
14288 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14289 .addReg(NewVal2Reg)
14290 .addReg(MaskReg);
14291 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14292 .addReg(OldVal2Reg)
14293 .addReg(MaskReg);
14294
14295 BB = loop1MBB;
14296 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14297 .addReg(ZeroReg)
14298 .addReg(PtrReg);
14299 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14300 .addReg(TmpDestReg)
14301 .addReg(MaskReg);
14302 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14303 .addReg(TmpReg)
14304 .addReg(OldVal3Reg);
14305 BuildMI(BB, dl, TII->get(PPC::BCC))
14307 .addReg(CrReg)
14308 .addMBB(exitMBB);
14309 BB->addSuccessor(loop2MBB);
14310 BB->addSuccessor(exitMBB);
14311
14312 BB = loop2MBB;
14313 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14314 .addReg(TmpDestReg)
14315 .addReg(MaskReg);
14316 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14317 .addReg(Tmp2Reg)
14318 .addReg(NewVal3Reg);
14319 BuildMI(BB, dl, TII->get(PPC::STWCX))
14320 .addReg(Tmp4Reg)
14321 .addReg(ZeroReg)
14322 .addReg(PtrReg);
14323 BuildMI(BB, dl, TII->get(PPC::BCC))
14325 .addReg(PPC::CR0)
14326 .addMBB(loop1MBB);
14327 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14328 BB->addSuccessor(loop1MBB);
14329 BB->addSuccessor(exitMBB);
14330
14331 // exitMBB:
14332 // ...
14333 BB = exitMBB;
14334 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14335 .addReg(TmpReg)
14336 .addReg(ShiftReg);
14337 } else if (MI.getOpcode() == PPC::FADDrtz) {
14338 // This pseudo performs an FADD with rounding mode temporarily forced
14339 // to round-to-zero. We emit this via custom inserter since the FPSCR
14340 // is not modeled at the SelectionDAG level.
14341 Register Dest = MI.getOperand(0).getReg();
14342 Register Src1 = MI.getOperand(1).getReg();
14343 Register Src2 = MI.getOperand(2).getReg();
14344 DebugLoc dl = MI.getDebugLoc();
14345
14346 MachineRegisterInfo &RegInfo = F->getRegInfo();
14347 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14348
14349 // Save FPSCR value.
14350 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14351
14352 // Set rounding mode to round-to-zero.
14353 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14354 .addImm(31)
14356
14357 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14358 .addImm(30)
14360
14361 // Perform addition.
14362 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14363 .addReg(Src1)
14364 .addReg(Src2);
14365 if (MI.getFlag(MachineInstr::NoFPExcept))
14367
14368 // Restore FPSCR value.
14369 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14370 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14371 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14372 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14373 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14374 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14375 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14376 ? PPC::ANDI8_rec
14377 : PPC::ANDI_rec;
14378 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14379 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14380
14381 MachineRegisterInfo &RegInfo = F->getRegInfo();
14382 Register Dest = RegInfo.createVirtualRegister(
14383 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14384
14385 DebugLoc Dl = MI.getDebugLoc();
14386 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14387 .addReg(MI.getOperand(1).getReg())
14388 .addImm(1);
14389 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14390 MI.getOperand(0).getReg())
14391 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14392 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14393 DebugLoc Dl = MI.getDebugLoc();
14394 MachineRegisterInfo &RegInfo = F->getRegInfo();
14395 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14396 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14397 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14398 MI.getOperand(0).getReg())
14399 .addReg(CRReg);
14400 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14401 DebugLoc Dl = MI.getDebugLoc();
14402 unsigned Imm = MI.getOperand(1).getImm();
14403 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14404 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14405 MI.getOperand(0).getReg())
14406 .addReg(PPC::CR0EQ);
14407 } else if (MI.getOpcode() == PPC::SETRNDi) {
14408 DebugLoc dl = MI.getDebugLoc();
14409 Register OldFPSCRReg = MI.getOperand(0).getReg();
14410
14411 // Save FPSCR value.
14412 if (MRI.use_empty(OldFPSCRReg))
14413 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14414 else
14415 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14416
14417 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14418 // the following settings:
14419 // 00 Round to nearest
14420 // 01 Round to 0
14421 // 10 Round to +inf
14422 // 11 Round to -inf
14423
14424 // When the operand is immediate, using the two least significant bits of
14425 // the immediate to set the bits 62:63 of FPSCR.
14426 unsigned Mode = MI.getOperand(1).getImm();
14427 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14428 .addImm(31)
14430
14431 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14432 .addImm(30)
14434 } else if (MI.getOpcode() == PPC::SETRND) {
14435 DebugLoc dl = MI.getDebugLoc();
14436
14437 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14438 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14439 // If the target doesn't have DirectMove, we should use stack to do the
14440 // conversion, because the target doesn't have the instructions like mtvsrd
14441 // or mfvsrd to do this conversion directly.
14442 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14443 if (Subtarget.hasDirectMove()) {
14444 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14445 .addReg(SrcReg);
14446 } else {
14447 // Use stack to do the register copy.
14448 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14449 MachineRegisterInfo &RegInfo = F->getRegInfo();
14450 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14451 if (RC == &PPC::F8RCRegClass) {
14452 // Copy register from F8RCRegClass to G8RCRegclass.
14453 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14454 "Unsupported RegClass.");
14455
14456 StoreOp = PPC::STFD;
14457 LoadOp = PPC::LD;
14458 } else {
14459 // Copy register from G8RCRegClass to F8RCRegclass.
14460 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14461 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14462 "Unsupported RegClass.");
14463 }
14464
14465 MachineFrameInfo &MFI = F->getFrameInfo();
14466 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14467
14468 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14469 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14471 MFI.getObjectAlign(FrameIdx));
14472
14473 // Store the SrcReg into the stack.
14474 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14475 .addReg(SrcReg)
14476 .addImm(0)
14477 .addFrameIndex(FrameIdx)
14478 .addMemOperand(MMOStore);
14479
14480 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14481 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14483 MFI.getObjectAlign(FrameIdx));
14484
14485 // Load from the stack where SrcReg is stored, and save to DestReg,
14486 // so we have done the RegClass conversion from RegClass::SrcReg to
14487 // RegClass::DestReg.
14488 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14489 .addImm(0)
14490 .addFrameIndex(FrameIdx)
14491 .addMemOperand(MMOLoad);
14492 }
14493 };
14494
14495 Register OldFPSCRReg = MI.getOperand(0).getReg();
14496
14497 // Save FPSCR value.
14498 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14499
14500 // When the operand is gprc register, use two least significant bits of the
14501 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14502 //
14503 // copy OldFPSCRTmpReg, OldFPSCRReg
14504 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14505 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14506 // copy NewFPSCRReg, NewFPSCRTmpReg
14507 // mtfsf 255, NewFPSCRReg
14508 MachineOperand SrcOp = MI.getOperand(1);
14509 MachineRegisterInfo &RegInfo = F->getRegInfo();
14510 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14511
14512 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14513
14514 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14515 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14516
14517 // The first operand of INSERT_SUBREG should be a register which has
14518 // subregisters, we only care about its RegClass, so we should use an
14519 // IMPLICIT_DEF register.
14520 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14521 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14522 .addReg(ImDefReg)
14523 .add(SrcOp)
14524 .addImm(1);
14525
14526 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14527 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14528 .addReg(OldFPSCRTmpReg)
14529 .addReg(ExtSrcReg)
14530 .addImm(0)
14531 .addImm(62);
14532
14533 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14534 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14535
14536 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14537 // bits of FPSCR.
14538 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14539 .addImm(255)
14540 .addReg(NewFPSCRReg)
14541 .addImm(0)
14542 .addImm(0);
14543 } else if (MI.getOpcode() == PPC::SETFLM) {
14544 DebugLoc Dl = MI.getDebugLoc();
14545
14546 // Result of setflm is previous FPSCR content, so we need to save it first.
14547 Register OldFPSCRReg = MI.getOperand(0).getReg();
14548 if (MRI.use_empty(OldFPSCRReg))
14549 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14550 else
14551 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14552
14553 // Put bits in 32:63 to FPSCR.
14554 Register NewFPSCRReg = MI.getOperand(1).getReg();
14555 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14556 .addImm(255)
14557 .addReg(NewFPSCRReg)
14558 .addImm(0)
14559 .addImm(0);
14560 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14561 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14562 return emitProbedAlloca(MI, BB);
14563 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14564 DebugLoc DL = MI.getDebugLoc();
14565 Register Src = MI.getOperand(2).getReg();
14566 Register Lo = MI.getOperand(0).getReg();
14567 Register Hi = MI.getOperand(1).getReg();
14568 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14569 .addDef(Lo)
14570 .addUse(Src, 0, PPC::sub_gp8_x1);
14571 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14572 .addDef(Hi)
14573 .addUse(Src, 0, PPC::sub_gp8_x0);
14574 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14575 MI.getOpcode() == PPC::STQX_PSEUDO) {
14576 DebugLoc DL = MI.getDebugLoc();
14577 // Ptr is used as the ptr_rc_no_r0 part
14578 // of LQ/STQ's memory operand and adding result of RA and RB,
14579 // so it has to be g8rc_and_g8rc_nox0.
14580 Register Ptr =
14581 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14582 Register Val = MI.getOperand(0).getReg();
14583 Register RA = MI.getOperand(1).getReg();
14584 Register RB = MI.getOperand(2).getReg();
14585 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14586 BuildMI(*BB, MI, DL,
14587 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14588 : TII->get(PPC::STQ))
14589 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14590 .addImm(0)
14591 .addReg(Ptr);
14592 } else {
14593 llvm_unreachable("Unexpected instr type to insert");
14594 }
14595
14596 MI.eraseFromParent(); // The pseudo instruction is gone now.
14597 return BB;
14598}
14599
14600//===----------------------------------------------------------------------===//
14601// Target Optimization Hooks
14602//===----------------------------------------------------------------------===//
14603
14604static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14605 // For the estimates, convergence is quadratic, so we essentially double the
14606 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14607 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14608 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14609 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14610 if (VT.getScalarType() == MVT::f64)
14611 RefinementSteps++;
14612 return RefinementSteps;
14613}
14614
14615SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14616 const DenormalMode &Mode) const {
14617 // We only have VSX Vector Test for software Square Root.
14618 EVT VT = Op.getValueType();
14619 if (!isTypeLegal(MVT::i1) ||
14620 (VT != MVT::f64 &&
14621 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14622 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
14623
14624 SDLoc DL(Op);
14625 // The output register of FTSQRT is CR field.
14626 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14627 // ftsqrt BF,FRB
14628 // Let e_b be the unbiased exponent of the double-precision
14629 // floating-point operand in register FRB.
14630 // fe_flag is set to 1 if either of the following conditions occurs.
14631 // - The double-precision floating-point operand in register FRB is a zero,
14632 // a NaN, or an infinity, or a negative value.
14633 // - e_b is less than or equal to -970.
14634 // Otherwise fe_flag is set to 0.
14635 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14636 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14637 // exponent is less than -970)
14638 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14639 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14640 FTSQRT, SRIdxVal),
14641 0);
14642}
14643
14644SDValue
14645PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14646 SelectionDAG &DAG) const {
14647 // We only have VSX Vector Square Root.
14648 EVT VT = Op.getValueType();
14649 if (VT != MVT::f64 &&
14650 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14652
14653 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14654}
14655
14656SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14657 int Enabled, int &RefinementSteps,
14658 bool &UseOneConstNR,
14659 bool Reciprocal) const {
14660 EVT VT = Operand.getValueType();
14661 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14662 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14663 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14664 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14665 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14666 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14667
14668 // The Newton-Raphson computation with a single constant does not provide
14669 // enough accuracy on some CPUs.
14670 UseOneConstNR = !Subtarget.needsTwoConstNR();
14671 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14672 }
14673 return SDValue();
14674}
14675
14676SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14677 int Enabled,
14678 int &RefinementSteps) const {
14679 EVT VT = Operand.getValueType();
14680 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14681 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14682 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14683 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14684 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14685 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14686 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14687 }
14688 return SDValue();
14689}
14690
14691unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14692 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14693 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14694 // enabled for division), this functionality is redundant with the default
14695 // combiner logic (once the division -> reciprocal/multiply transformation
14696 // has taken place). As a result, this matters more for older cores than for
14697 // newer ones.
14698
14699 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14700 // reciprocal if there are two or more FDIVs (for embedded cores with only
14701 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14702 switch (Subtarget.getCPUDirective()) {
14703 default:
14704 return 3;
14705 case PPC::DIR_440:
14706 case PPC::DIR_A2:
14707 case PPC::DIR_E500:
14708 case PPC::DIR_E500mc:
14709 case PPC::DIR_E5500:
14710 return 2;
14711 }
14712}
14713
14714// isConsecutiveLSLoc needs to work even if all adds have not yet been
14715// collapsed, and so we need to look through chains of them.
14717 int64_t& Offset, SelectionDAG &DAG) {
14718 if (DAG.isBaseWithConstantOffset(Loc)) {
14719 Base = Loc.getOperand(0);
14720 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14721
14722 // The base might itself be a base plus an offset, and if so, accumulate
14723 // that as well.
14725 }
14726}
14727
14729 unsigned Bytes, int Dist,
14730 SelectionDAG &DAG) {
14731 if (VT.getSizeInBits() / 8 != Bytes)
14732 return false;
14733
14734 SDValue BaseLoc = Base->getBasePtr();
14735 if (Loc.getOpcode() == ISD::FrameIndex) {
14736 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14737 return false;
14739 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14740 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14741 int FS = MFI.getObjectSize(FI);
14742 int BFS = MFI.getObjectSize(BFI);
14743 if (FS != BFS || FS != (int)Bytes) return false;
14744 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14745 }
14746
14747 SDValue Base1 = Loc, Base2 = BaseLoc;
14748 int64_t Offset1 = 0, Offset2 = 0;
14749 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14750 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14751 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14752 return true;
14753
14754 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14755 const GlobalValue *GV1 = nullptr;
14756 const GlobalValue *GV2 = nullptr;
14757 Offset1 = 0;
14758 Offset2 = 0;
14759 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14760 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14761 if (isGA1 && isGA2 && GV1 == GV2)
14762 return Offset1 == (Offset2 + Dist*Bytes);
14763 return false;
14764}
14765
14766// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14767// not enforce equality of the chain operands.
14769 unsigned Bytes, int Dist,
14770 SelectionDAG &DAG) {
14771 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
14772 EVT VT = LS->getMemoryVT();
14773 SDValue Loc = LS->getBasePtr();
14774 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14775 }
14776
14777 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14778 EVT VT;
14779 switch (N->getConstantOperandVal(1)) {
14780 default: return false;
14781 case Intrinsic::ppc_altivec_lvx:
14782 case Intrinsic::ppc_altivec_lvxl:
14783 case Intrinsic::ppc_vsx_lxvw4x:
14784 case Intrinsic::ppc_vsx_lxvw4x_be:
14785 VT = MVT::v4i32;
14786 break;
14787 case Intrinsic::ppc_vsx_lxvd2x:
14788 case Intrinsic::ppc_vsx_lxvd2x_be:
14789 VT = MVT::v2f64;
14790 break;
14791 case Intrinsic::ppc_altivec_lvebx:
14792 VT = MVT::i8;
14793 break;
14794 case Intrinsic::ppc_altivec_lvehx:
14795 VT = MVT::i16;
14796 break;
14797 case Intrinsic::ppc_altivec_lvewx:
14798 VT = MVT::i32;
14799 break;
14800 }
14801
14802 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14803 }
14804
14805 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14806 EVT VT;
14807 switch (N->getConstantOperandVal(1)) {
14808 default: return false;
14809 case Intrinsic::ppc_altivec_stvx:
14810 case Intrinsic::ppc_altivec_stvxl:
14811 case Intrinsic::ppc_vsx_stxvw4x:
14812 VT = MVT::v4i32;
14813 break;
14814 case Intrinsic::ppc_vsx_stxvd2x:
14815 VT = MVT::v2f64;
14816 break;
14817 case Intrinsic::ppc_vsx_stxvw4x_be:
14818 VT = MVT::v4i32;
14819 break;
14820 case Intrinsic::ppc_vsx_stxvd2x_be:
14821 VT = MVT::v2f64;
14822 break;
14823 case Intrinsic::ppc_altivec_stvebx:
14824 VT = MVT::i8;
14825 break;
14826 case Intrinsic::ppc_altivec_stvehx:
14827 VT = MVT::i16;
14828 break;
14829 case Intrinsic::ppc_altivec_stvewx:
14830 VT = MVT::i32;
14831 break;
14832 }
14833
14834 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14835 }
14836
14837 return false;
14838}
14839
14840// Return true is there is a nearyby consecutive load to the one provided
14841// (regardless of alignment). We search up and down the chain, looking though
14842// token factors and other loads (but nothing else). As a result, a true result
14843// indicates that it is safe to create a new consecutive load adjacent to the
14844// load provided.
14846 SDValue Chain = LD->getChain();
14847 EVT VT = LD->getMemoryVT();
14848
14849 SmallPtrSet<SDNode *, 16> LoadRoots;
14850 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14852
14853 // First, search up the chain, branching to follow all token-factor operands.
14854 // If we find a consecutive load, then we're done, otherwise, record all
14855 // nodes just above the top-level loads and token factors.
14856 while (!Queue.empty()) {
14857 SDNode *ChainNext = Queue.pop_back_val();
14858 if (!Visited.insert(ChainNext).second)
14859 continue;
14860
14861 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14862 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14863 return true;
14864
14865 if (!Visited.count(ChainLD->getChain().getNode()))
14866 Queue.push_back(ChainLD->getChain().getNode());
14867 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14868 for (const SDUse &O : ChainNext->ops())
14869 if (!Visited.count(O.getNode()))
14870 Queue.push_back(O.getNode());
14871 } else
14872 LoadRoots.insert(ChainNext);
14873 }
14874
14875 // Second, search down the chain, starting from the top-level nodes recorded
14876 // in the first phase. These top-level nodes are the nodes just above all
14877 // loads and token factors. Starting with their uses, recursively look though
14878 // all loads (just the chain uses) and token factors to find a consecutive
14879 // load.
14880 Visited.clear();
14881 Queue.clear();
14882
14883 for (SDNode *I : LoadRoots) {
14884 Queue.push_back(I);
14885
14886 while (!Queue.empty()) {
14887 SDNode *LoadRoot = Queue.pop_back_val();
14888 if (!Visited.insert(LoadRoot).second)
14889 continue;
14890
14891 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14892 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14893 return true;
14894
14895 for (SDNode *U : LoadRoot->users())
14896 if (((isa<MemSDNode>(U) &&
14897 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14898 U->getOpcode() == ISD::TokenFactor) &&
14899 !Visited.count(U))
14900 Queue.push_back(U);
14901 }
14902 }
14903
14904 return false;
14905}
14906
14907/// This function is called when we have proved that a SETCC node can be replaced
14908/// by subtraction (and other supporting instructions) so that the result of
14909/// comparison is kept in a GPR instead of CR. This function is purely for
14910/// codegen purposes and has some flags to guide the codegen process.
14911static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14912 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14913 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14914
14915 // Zero extend the operands to the largest legal integer. Originally, they
14916 // must be of a strictly smaller size.
14917 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14918 DAG.getConstant(Size, DL, MVT::i32));
14919 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14920 DAG.getConstant(Size, DL, MVT::i32));
14921
14922 // Swap if needed. Depends on the condition code.
14923 if (Swap)
14924 std::swap(Op0, Op1);
14925
14926 // Subtract extended integers.
14927 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14928
14929 // Move the sign bit to the least significant position and zero out the rest.
14930 // Now the least significant bit carries the result of original comparison.
14931 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14932 DAG.getConstant(Size - 1, DL, MVT::i32));
14933 auto Final = Shifted;
14934
14935 // Complement the result if needed. Based on the condition code.
14936 if (Complement)
14937 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14938 DAG.getConstant(1, DL, MVT::i64));
14939
14940 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14941}
14942
14943SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14944 DAGCombinerInfo &DCI) const {
14945 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14946
14947 SelectionDAG &DAG = DCI.DAG;
14948 SDLoc DL(N);
14949
14950 // Size of integers being compared has a critical role in the following
14951 // analysis, so we prefer to do this when all types are legal.
14952 if (!DCI.isAfterLegalizeDAG())
14953 return SDValue();
14954
14955 // If all users of SETCC extend its value to a legal integer type
14956 // then we replace SETCC with a subtraction
14957 for (const SDNode *U : N->users())
14958 if (U->getOpcode() != ISD::ZERO_EXTEND)
14959 return SDValue();
14960
14961 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14962 auto OpSize = N->getOperand(0).getValueSizeInBits();
14963
14965
14966 if (OpSize < Size) {
14967 switch (CC) {
14968 default: break;
14969 case ISD::SETULT:
14970 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14971 case ISD::SETULE:
14972 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14973 case ISD::SETUGT:
14974 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14975 case ISD::SETUGE:
14976 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14977 }
14978 }
14979
14980 return SDValue();
14981}
14982
14983SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14984 DAGCombinerInfo &DCI) const {
14985 SelectionDAG &DAG = DCI.DAG;
14986 SDLoc dl(N);
14987
14988 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14989 // If we're tracking CR bits, we need to be careful that we don't have:
14990 // trunc(binary-ops(zext(x), zext(y)))
14991 // or
14992 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14993 // such that we're unnecessarily moving things into GPRs when it would be
14994 // better to keep them in CR bits.
14995
14996 // Note that trunc here can be an actual i1 trunc, or can be the effective
14997 // truncation that comes from a setcc or select_cc.
14998 if (N->getOpcode() == ISD::TRUNCATE &&
14999 N->getValueType(0) != MVT::i1)
15000 return SDValue();
15001
15002 if (N->getOperand(0).getValueType() != MVT::i32 &&
15003 N->getOperand(0).getValueType() != MVT::i64)
15004 return SDValue();
15005
15006 if (N->getOpcode() == ISD::SETCC ||
15007 N->getOpcode() == ISD::SELECT_CC) {
15008 // If we're looking at a comparison, then we need to make sure that the
15009 // high bits (all except for the first) don't matter the result.
15010 ISD::CondCode CC =
15011 cast<CondCodeSDNode>(N->getOperand(
15012 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15013 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15014
15015 if (ISD::isSignedIntSetCC(CC)) {
15016 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15017 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15018 return SDValue();
15019 } else if (ISD::isUnsignedIntSetCC(CC)) {
15020 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15021 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15022 !DAG.MaskedValueIsZero(N->getOperand(1),
15023 APInt::getHighBitsSet(OpBits, OpBits-1)))
15024 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15025 : SDValue());
15026 } else {
15027 // This is neither a signed nor an unsigned comparison, just make sure
15028 // that the high bits are equal.
15029 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15030 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15031
15032 // We don't really care about what is known about the first bit (if
15033 // anything), so pretend that it is known zero for both to ensure they can
15034 // be compared as constants.
15035 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15036 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15037
15038 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15039 Op1Known.getConstant() != Op2Known.getConstant())
15040 return SDValue();
15041 }
15042 }
15043
15044 // We now know that the higher-order bits are irrelevant, we just need to
15045 // make sure that all of the intermediate operations are bit operations, and
15046 // all inputs are extensions.
15047 if (N->getOperand(0).getOpcode() != ISD::AND &&
15048 N->getOperand(0).getOpcode() != ISD::OR &&
15049 N->getOperand(0).getOpcode() != ISD::XOR &&
15050 N->getOperand(0).getOpcode() != ISD::SELECT &&
15051 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15052 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15053 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15054 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15055 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15056 return SDValue();
15057
15058 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15059 N->getOperand(1).getOpcode() != ISD::AND &&
15060 N->getOperand(1).getOpcode() != ISD::OR &&
15061 N->getOperand(1).getOpcode() != ISD::XOR &&
15062 N->getOperand(1).getOpcode() != ISD::SELECT &&
15063 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15064 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15065 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15066 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15067 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15068 return SDValue();
15069
15071 SmallVector<SDValue, 8> BinOps, PromOps;
15073
15074 for (unsigned i = 0; i < 2; ++i) {
15075 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15076 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15077 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15078 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15079 isa<ConstantSDNode>(N->getOperand(i)))
15080 Inputs.push_back(N->getOperand(i));
15081 else
15082 BinOps.push_back(N->getOperand(i));
15083
15084 if (N->getOpcode() == ISD::TRUNCATE)
15085 break;
15086 }
15087
15088 // Visit all inputs, collect all binary operations (and, or, xor and
15089 // select) that are all fed by extensions.
15090 while (!BinOps.empty()) {
15091 SDValue BinOp = BinOps.pop_back_val();
15092
15093 if (!Visited.insert(BinOp.getNode()).second)
15094 continue;
15095
15096 PromOps.push_back(BinOp);
15097
15098 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15099 // The condition of the select is not promoted.
15100 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15101 continue;
15102 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15103 continue;
15104
15105 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15106 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15107 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15108 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15109 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15110 Inputs.push_back(BinOp.getOperand(i));
15111 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15112 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15113 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15114 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15115 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15116 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15117 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15118 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15119 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15120 BinOps.push_back(BinOp.getOperand(i));
15121 } else {
15122 // We have an input that is not an extension or another binary
15123 // operation; we'll abort this transformation.
15124 return SDValue();
15125 }
15126 }
15127 }
15128
15129 // Make sure that this is a self-contained cluster of operations (which
15130 // is not quite the same thing as saying that everything has only one
15131 // use).
15132 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15133 if (isa<ConstantSDNode>(Inputs[i]))
15134 continue;
15135
15136 for (const SDNode *User : Inputs[i].getNode()->users()) {
15137 if (User != N && !Visited.count(User))
15138 return SDValue();
15139
15140 // Make sure that we're not going to promote the non-output-value
15141 // operand(s) or SELECT or SELECT_CC.
15142 // FIXME: Although we could sometimes handle this, and it does occur in
15143 // practice that one of the condition inputs to the select is also one of
15144 // the outputs, we currently can't deal with this.
15145 if (User->getOpcode() == ISD::SELECT) {
15146 if (User->getOperand(0) == Inputs[i])
15147 return SDValue();
15148 } else if (User->getOpcode() == ISD::SELECT_CC) {
15149 if (User->getOperand(0) == Inputs[i] ||
15150 User->getOperand(1) == Inputs[i])
15151 return SDValue();
15152 }
15153 }
15154 }
15155
15156 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15157 for (const SDNode *User : PromOps[i].getNode()->users()) {
15158 if (User != N && !Visited.count(User))
15159 return SDValue();
15160
15161 // Make sure that we're not going to promote the non-output-value
15162 // operand(s) or SELECT or SELECT_CC.
15163 // FIXME: Although we could sometimes handle this, and it does occur in
15164 // practice that one of the condition inputs to the select is also one of
15165 // the outputs, we currently can't deal with this.
15166 if (User->getOpcode() == ISD::SELECT) {
15167 if (User->getOperand(0) == PromOps[i])
15168 return SDValue();
15169 } else if (User->getOpcode() == ISD::SELECT_CC) {
15170 if (User->getOperand(0) == PromOps[i] ||
15171 User->getOperand(1) == PromOps[i])
15172 return SDValue();
15173 }
15174 }
15175 }
15176
15177 // Replace all inputs with the extension operand.
15178 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15179 // Constants may have users outside the cluster of to-be-promoted nodes,
15180 // and so we need to replace those as we do the promotions.
15181 if (isa<ConstantSDNode>(Inputs[i]))
15182 continue;
15183 else
15184 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15185 }
15186
15187 std::list<HandleSDNode> PromOpHandles;
15188 for (auto &PromOp : PromOps)
15189 PromOpHandles.emplace_back(PromOp);
15190
15191 // Replace all operations (these are all the same, but have a different
15192 // (i1) return type). DAG.getNode will validate that the types of
15193 // a binary operator match, so go through the list in reverse so that
15194 // we've likely promoted both operands first. Any intermediate truncations or
15195 // extensions disappear.
15196 while (!PromOpHandles.empty()) {
15197 SDValue PromOp = PromOpHandles.back().getValue();
15198 PromOpHandles.pop_back();
15199
15200 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15201 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15202 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15203 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15204 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15205 PromOp.getOperand(0).getValueType() != MVT::i1) {
15206 // The operand is not yet ready (see comment below).
15207 PromOpHandles.emplace_front(PromOp);
15208 continue;
15209 }
15210
15211 SDValue RepValue = PromOp.getOperand(0);
15212 if (isa<ConstantSDNode>(RepValue))
15213 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15214
15215 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15216 continue;
15217 }
15218
15219 unsigned C;
15220 switch (PromOp.getOpcode()) {
15221 default: C = 0; break;
15222 case ISD::SELECT: C = 1; break;
15223 case ISD::SELECT_CC: C = 2; break;
15224 }
15225
15226 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15227 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15228 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15229 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15230 // The to-be-promoted operands of this node have not yet been
15231 // promoted (this should be rare because we're going through the
15232 // list backward, but if one of the operands has several users in
15233 // this cluster of to-be-promoted nodes, it is possible).
15234 PromOpHandles.emplace_front(PromOp);
15235 continue;
15236 }
15237
15238 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15239
15240 // If there are any constant inputs, make sure they're replaced now.
15241 for (unsigned i = 0; i < 2; ++i)
15242 if (isa<ConstantSDNode>(Ops[C+i]))
15243 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15244
15245 DAG.ReplaceAllUsesOfValueWith(PromOp,
15246 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15247 }
15248
15249 // Now we're left with the initial truncation itself.
15250 if (N->getOpcode() == ISD::TRUNCATE)
15251 return N->getOperand(0);
15252
15253 // Otherwise, this is a comparison. The operands to be compared have just
15254 // changed type (to i1), but everything else is the same.
15255 return SDValue(N, 0);
15256}
15257
15258SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15259 DAGCombinerInfo &DCI) const {
15260 SelectionDAG &DAG = DCI.DAG;
15261 SDLoc dl(N);
15262
15263 // If we're tracking CR bits, we need to be careful that we don't have:
15264 // zext(binary-ops(trunc(x), trunc(y)))
15265 // or
15266 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15267 // such that we're unnecessarily moving things into CR bits that can more
15268 // efficiently stay in GPRs. Note that if we're not certain that the high
15269 // bits are set as required by the final extension, we still may need to do
15270 // some masking to get the proper behavior.
15271
15272 // This same functionality is important on PPC64 when dealing with
15273 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15274 // the return values of functions. Because it is so similar, it is handled
15275 // here as well.
15276
15277 if (N->getValueType(0) != MVT::i32 &&
15278 N->getValueType(0) != MVT::i64)
15279 return SDValue();
15280
15281 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15282 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15283 return SDValue();
15284
15285 if (N->getOperand(0).getOpcode() != ISD::AND &&
15286 N->getOperand(0).getOpcode() != ISD::OR &&
15287 N->getOperand(0).getOpcode() != ISD::XOR &&
15288 N->getOperand(0).getOpcode() != ISD::SELECT &&
15289 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15290 return SDValue();
15291
15293 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15295
15296 // Visit all inputs, collect all binary operations (and, or, xor and
15297 // select) that are all fed by truncations.
15298 while (!BinOps.empty()) {
15299 SDValue BinOp = BinOps.pop_back_val();
15300
15301 if (!Visited.insert(BinOp.getNode()).second)
15302 continue;
15303
15304 PromOps.push_back(BinOp);
15305
15306 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15307 // The condition of the select is not promoted.
15308 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15309 continue;
15310 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15311 continue;
15312
15313 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15314 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15315 Inputs.push_back(BinOp.getOperand(i));
15316 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15317 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15318 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15319 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15320 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15321 BinOps.push_back(BinOp.getOperand(i));
15322 } else {
15323 // We have an input that is not a truncation or another binary
15324 // operation; we'll abort this transformation.
15325 return SDValue();
15326 }
15327 }
15328 }
15329
15330 // The operands of a select that must be truncated when the select is
15331 // promoted because the operand is actually part of the to-be-promoted set.
15332 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15333
15334 // Make sure that this is a self-contained cluster of operations (which
15335 // is not quite the same thing as saying that everything has only one
15336 // use).
15337 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15338 if (isa<ConstantSDNode>(Inputs[i]))
15339 continue;
15340
15341 for (SDNode *User : Inputs[i].getNode()->users()) {
15342 if (User != N && !Visited.count(User))
15343 return SDValue();
15344
15345 // If we're going to promote the non-output-value operand(s) or SELECT or
15346 // SELECT_CC, record them for truncation.
15347 if (User->getOpcode() == ISD::SELECT) {
15348 if (User->getOperand(0) == Inputs[i])
15349 SelectTruncOp[0].insert(std::make_pair(User,
15350 User->getOperand(0).getValueType()));
15351 } else if (User->getOpcode() == ISD::SELECT_CC) {
15352 if (User->getOperand(0) == Inputs[i])
15353 SelectTruncOp[0].insert(std::make_pair(User,
15354 User->getOperand(0).getValueType()));
15355 if (User->getOperand(1) == Inputs[i])
15356 SelectTruncOp[1].insert(std::make_pair(User,
15357 User->getOperand(1).getValueType()));
15358 }
15359 }
15360 }
15361
15362 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15363 for (SDNode *User : PromOps[i].getNode()->users()) {
15364 if (User != N && !Visited.count(User))
15365 return SDValue();
15366
15367 // If we're going to promote the non-output-value operand(s) or SELECT or
15368 // SELECT_CC, record them for truncation.
15369 if (User->getOpcode() == ISD::SELECT) {
15370 if (User->getOperand(0) == PromOps[i])
15371 SelectTruncOp[0].insert(std::make_pair(User,
15372 User->getOperand(0).getValueType()));
15373 } else if (User->getOpcode() == ISD::SELECT_CC) {
15374 if (User->getOperand(0) == PromOps[i])
15375 SelectTruncOp[0].insert(std::make_pair(User,
15376 User->getOperand(0).getValueType()));
15377 if (User->getOperand(1) == PromOps[i])
15378 SelectTruncOp[1].insert(std::make_pair(User,
15379 User->getOperand(1).getValueType()));
15380 }
15381 }
15382 }
15383
15384 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15385 bool ReallyNeedsExt = false;
15386 if (N->getOpcode() != ISD::ANY_EXTEND) {
15387 // If all of the inputs are not already sign/zero extended, then
15388 // we'll still need to do that at the end.
15389 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15390 if (isa<ConstantSDNode>(Inputs[i]))
15391 continue;
15392
15393 unsigned OpBits =
15394 Inputs[i].getOperand(0).getValueSizeInBits();
15395 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15396
15397 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15398 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15399 APInt::getHighBitsSet(OpBits,
15400 OpBits-PromBits))) ||
15401 (N->getOpcode() == ISD::SIGN_EXTEND &&
15402 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15403 (OpBits-(PromBits-1)))) {
15404 ReallyNeedsExt = true;
15405 break;
15406 }
15407 }
15408 }
15409
15410 // Replace all inputs, either with the truncation operand, or a
15411 // truncation or extension to the final output type.
15412 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15413 // Constant inputs need to be replaced with the to-be-promoted nodes that
15414 // use them because they might have users outside of the cluster of
15415 // promoted nodes.
15416 if (isa<ConstantSDNode>(Inputs[i]))
15417 continue;
15418
15419 SDValue InSrc = Inputs[i].getOperand(0);
15420 if (Inputs[i].getValueType() == N->getValueType(0))
15421 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15422 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15423 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15424 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15425 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15426 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15427 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15428 else
15429 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15430 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15431 }
15432
15433 std::list<HandleSDNode> PromOpHandles;
15434 for (auto &PromOp : PromOps)
15435 PromOpHandles.emplace_back(PromOp);
15436
15437 // Replace all operations (these are all the same, but have a different
15438 // (promoted) return type). DAG.getNode will validate that the types of
15439 // a binary operator match, so go through the list in reverse so that
15440 // we've likely promoted both operands first.
15441 while (!PromOpHandles.empty()) {
15442 SDValue PromOp = PromOpHandles.back().getValue();
15443 PromOpHandles.pop_back();
15444
15445 unsigned C;
15446 switch (PromOp.getOpcode()) {
15447 default: C = 0; break;
15448 case ISD::SELECT: C = 1; break;
15449 case ISD::SELECT_CC: C = 2; break;
15450 }
15451
15452 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15453 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15454 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15455 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15456 // The to-be-promoted operands of this node have not yet been
15457 // promoted (this should be rare because we're going through the
15458 // list backward, but if one of the operands has several users in
15459 // this cluster of to-be-promoted nodes, it is possible).
15460 PromOpHandles.emplace_front(PromOp);
15461 continue;
15462 }
15463
15464 // For SELECT and SELECT_CC nodes, we do a similar check for any
15465 // to-be-promoted comparison inputs.
15466 if (PromOp.getOpcode() == ISD::SELECT ||
15467 PromOp.getOpcode() == ISD::SELECT_CC) {
15468 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15469 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15470 (SelectTruncOp[1].count(PromOp.getNode()) &&
15471 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15472 PromOpHandles.emplace_front(PromOp);
15473 continue;
15474 }
15475 }
15476
15477 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15478
15479 // If this node has constant inputs, then they'll need to be promoted here.
15480 for (unsigned i = 0; i < 2; ++i) {
15481 if (!isa<ConstantSDNode>(Ops[C+i]))
15482 continue;
15483 if (Ops[C+i].getValueType() == N->getValueType(0))
15484 continue;
15485
15486 if (N->getOpcode() == ISD::SIGN_EXTEND)
15487 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15488 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15489 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15490 else
15491 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15492 }
15493
15494 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15495 // truncate them again to the original value type.
15496 if (PromOp.getOpcode() == ISD::SELECT ||
15497 PromOp.getOpcode() == ISD::SELECT_CC) {
15498 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15499 if (SI0 != SelectTruncOp[0].end())
15500 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15501 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15502 if (SI1 != SelectTruncOp[1].end())
15503 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15504 }
15505
15506 DAG.ReplaceAllUsesOfValueWith(PromOp,
15507 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15508 }
15509
15510 // Now we're left with the initial extension itself.
15511 if (!ReallyNeedsExt)
15512 return N->getOperand(0);
15513
15514 // To zero extend, just mask off everything except for the first bit (in the
15515 // i1 case).
15516 if (N->getOpcode() == ISD::ZERO_EXTEND)
15517 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15519 N->getValueSizeInBits(0), PromBits),
15520 dl, N->getValueType(0)));
15521
15522 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15523 "Invalid extension type");
15524 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15525 SDValue ShiftCst =
15526 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15527 return DAG.getNode(
15528 ISD::SRA, dl, N->getValueType(0),
15529 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15530 ShiftCst);
15531}
15532
15533SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15534 DAGCombinerInfo &DCI) const {
15535 assert(N->getOpcode() == ISD::SETCC &&
15536 "Should be called with a SETCC node");
15537
15538 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15539 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15540 SDValue LHS = N->getOperand(0);
15541 SDValue RHS = N->getOperand(1);
15542
15543 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15544 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15545 LHS.hasOneUse())
15546 std::swap(LHS, RHS);
15547
15548 // x == 0-y --> x+y == 0
15549 // x != 0-y --> x+y != 0
15550 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15551 RHS.hasOneUse()) {
15552 SDLoc DL(N);
15553 SelectionDAG &DAG = DCI.DAG;
15554 EVT VT = N->getValueType(0);
15555 EVT OpVT = LHS.getValueType();
15556 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15557 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15558 }
15559 }
15560
15561 return DAGCombineTruncBoolExt(N, DCI);
15562}
15563
15564// Is this an extending load from an f32 to an f64?
15565static bool isFPExtLoad(SDValue Op) {
15566 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15567 return LD->getExtensionType() == ISD::EXTLOAD &&
15568 Op.getValueType() == MVT::f64;
15569 return false;
15570}
15571
15572/// Reduces the number of fp-to-int conversion when building a vector.
15573///
15574/// If this vector is built out of floating to integer conversions,
15575/// transform it to a vector built out of floating point values followed by a
15576/// single floating to integer conversion of the vector.
15577/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15578/// becomes (fptosi (build_vector ($A, $B, ...)))
15579SDValue PPCTargetLowering::
15580combineElementTruncationToVectorTruncation(SDNode *N,
15581 DAGCombinerInfo &DCI) const {
15582 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15583 "Should be called with a BUILD_VECTOR node");
15584
15585 SelectionDAG &DAG = DCI.DAG;
15586 SDLoc dl(N);
15587
15588 SDValue FirstInput = N->getOperand(0);
15589 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15590 "The input operand must be an fp-to-int conversion.");
15591
15592 // This combine happens after legalization so the fp_to_[su]i nodes are
15593 // already converted to PPCSISD nodes.
15594 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15595 if (FirstConversion == PPCISD::FCTIDZ ||
15596 FirstConversion == PPCISD::FCTIDUZ ||
15597 FirstConversion == PPCISD::FCTIWZ ||
15598 FirstConversion == PPCISD::FCTIWUZ) {
15599 bool IsSplat = true;
15600 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15601 FirstConversion == PPCISD::FCTIWUZ;
15602 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15604 EVT TargetVT = N->getValueType(0);
15605 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15606 SDValue NextOp = N->getOperand(i);
15607 if (NextOp.getOpcode() != PPCISD::MFVSR)
15608 return SDValue();
15609 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15610 if (NextConversion != FirstConversion)
15611 return SDValue();
15612 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15613 // This is not valid if the input was originally double precision. It is
15614 // also not profitable to do unless this is an extending load in which
15615 // case doing this combine will allow us to combine consecutive loads.
15616 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15617 return SDValue();
15618 if (N->getOperand(i) != FirstInput)
15619 IsSplat = false;
15620 }
15621
15622 // If this is a splat, we leave it as-is since there will be only a single
15623 // fp-to-int conversion followed by a splat of the integer. This is better
15624 // for 32-bit and smaller ints and neutral for 64-bit ints.
15625 if (IsSplat)
15626 return SDValue();
15627
15628 // Now that we know we have the right type of node, get its operands
15629 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15630 SDValue In = N->getOperand(i).getOperand(0);
15631 if (Is32Bit) {
15632 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15633 // here, we know that all inputs are extending loads so this is safe).
15634 if (In.isUndef())
15635 Ops.push_back(DAG.getUNDEF(SrcVT));
15636 else {
15637 SDValue Trunc =
15638 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15639 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15640 Ops.push_back(Trunc);
15641 }
15642 } else
15643 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15644 }
15645
15646 unsigned Opcode;
15647 if (FirstConversion == PPCISD::FCTIDZ ||
15648 FirstConversion == PPCISD::FCTIWZ)
15649 Opcode = ISD::FP_TO_SINT;
15650 else
15651 Opcode = ISD::FP_TO_UINT;
15652
15653 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15654 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15655 return DAG.getNode(Opcode, dl, TargetVT, BV);
15656 }
15657 return SDValue();
15658}
15659
15660/// Reduce the number of loads when building a vector.
15661///
15662/// Building a vector out of multiple loads can be converted to a load
15663/// of the vector type if the loads are consecutive. If the loads are
15664/// consecutive but in descending order, a shuffle is added at the end
15665/// to reorder the vector.
15667 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15668 "Should be called with a BUILD_VECTOR node");
15669
15670 SDLoc dl(N);
15671
15672 // Return early for non byte-sized type, as they can't be consecutive.
15673 if (!N->getValueType(0).getVectorElementType().isByteSized())
15674 return SDValue();
15675
15676 bool InputsAreConsecutiveLoads = true;
15677 bool InputsAreReverseConsecutive = true;
15678 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15679 SDValue FirstInput = N->getOperand(0);
15680 bool IsRoundOfExtLoad = false;
15681 LoadSDNode *FirstLoad = nullptr;
15682
15683 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15684 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15685 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15686 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15687 }
15688 // Not a build vector of (possibly fp_rounded) loads.
15689 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15690 N->getNumOperands() == 1)
15691 return SDValue();
15692
15693 if (!IsRoundOfExtLoad)
15694 FirstLoad = cast<LoadSDNode>(FirstInput);
15695
15697 InputLoads.push_back(FirstLoad);
15698 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15699 // If any inputs are fp_round(extload), they all must be.
15700 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15701 return SDValue();
15702
15703 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15704 N->getOperand(i);
15705 if (NextInput.getOpcode() != ISD::LOAD)
15706 return SDValue();
15707
15708 SDValue PreviousInput =
15709 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15710 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15711 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15712
15713 // If any inputs are fp_round(extload), they all must be.
15714 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15715 return SDValue();
15716
15717 // We only care about regular loads. The PPC-specific load intrinsics
15718 // will not lead to a merge opportunity.
15719 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15720 InputsAreConsecutiveLoads = false;
15721 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15722 InputsAreReverseConsecutive = false;
15723
15724 // Exit early if the loads are neither consecutive nor reverse consecutive.
15725 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15726 return SDValue();
15727 InputLoads.push_back(LD2);
15728 }
15729
15730 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15731 "The loads cannot be both consecutive and reverse consecutive.");
15732
15733 SDValue WideLoad;
15734 SDValue ReturnSDVal;
15735 if (InputsAreConsecutiveLoads) {
15736 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15737 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15738 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15739 FirstLoad->getAlign());
15740 ReturnSDVal = WideLoad;
15741 } else if (InputsAreReverseConsecutive) {
15742 LoadSDNode *LastLoad = InputLoads.back();
15743 assert(LastLoad && "Input needs to be a LoadSDNode.");
15744 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15745 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15746 LastLoad->getAlign());
15748 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15749 Ops.push_back(i);
15750
15751 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15752 DAG.getUNDEF(N->getValueType(0)), Ops);
15753 } else
15754 return SDValue();
15755
15756 for (auto *LD : InputLoads)
15757 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15758 return ReturnSDVal;
15759}
15760
15761// This function adds the required vector_shuffle needed to get
15762// the elements of the vector extract in the correct position
15763// as specified by the CorrectElems encoding.
15765 SDValue Input, uint64_t Elems,
15766 uint64_t CorrectElems) {
15767 SDLoc dl(N);
15768
15769 unsigned NumElems = Input.getValueType().getVectorNumElements();
15770 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15771
15772 // Knowing the element indices being extracted from the original
15773 // vector and the order in which they're being inserted, just put
15774 // them at element indices required for the instruction.
15775 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15776 if (DAG.getDataLayout().isLittleEndian())
15777 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15778 else
15779 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15780 CorrectElems = CorrectElems >> 8;
15781 Elems = Elems >> 8;
15782 }
15783
15784 SDValue Shuffle =
15785 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15786 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15787
15788 EVT VT = N->getValueType(0);
15789 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15790
15791 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15794 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15795 DAG.getValueType(ExtVT));
15796}
15797
15798// Look for build vector patterns where input operands come from sign
15799// extended vector_extract elements of specific indices. If the correct indices
15800// aren't used, add a vector shuffle to fix up the indices and create
15801// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15802// during instruction selection.
15804 // This array encodes the indices that the vector sign extend instructions
15805 // extract from when extending from one type to another for both BE and LE.
15806 // The right nibble of each byte corresponds to the LE incides.
15807 // and the left nibble of each byte corresponds to the BE incides.
15808 // For example: 0x3074B8FC byte->word
15809 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15810 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15811 // For example: 0x000070F8 byte->double word
15812 // For LE: the allowed indices are: 0x0,0x8
15813 // For BE: the allowed indices are: 0x7,0xF
15814 uint64_t TargetElems[] = {
15815 0x3074B8FC, // b->w
15816 0x000070F8, // b->d
15817 0x10325476, // h->w
15818 0x00003074, // h->d
15819 0x00001032, // w->d
15820 };
15821
15822 uint64_t Elems = 0;
15823 int Index;
15824 SDValue Input;
15825
15826 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15827 if (!Op)
15828 return false;
15829 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15830 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15831 return false;
15832
15833 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15834 // of the right width.
15835 SDValue Extract = Op.getOperand(0);
15836 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15837 Extract = Extract.getOperand(0);
15838 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15839 return false;
15840
15841 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
15842 if (!ExtOp)
15843 return false;
15844
15845 Index = ExtOp->getZExtValue();
15846 if (Input && Input != Extract.getOperand(0))
15847 return false;
15848
15849 if (!Input)
15850 Input = Extract.getOperand(0);
15851
15852 Elems = Elems << 8;
15853 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15854 Elems |= Index;
15855
15856 return true;
15857 };
15858
15859 // If the build vector operands aren't sign extended vector extracts,
15860 // of the same input vector, then return.
15861 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15862 if (!isSExtOfVecExtract(N->getOperand(i))) {
15863 return SDValue();
15864 }
15865 }
15866
15867 // If the vector extract indices are not correct, add the appropriate
15868 // vector_shuffle.
15869 int TgtElemArrayIdx;
15870 int InputSize = Input.getValueType().getScalarSizeInBits();
15871 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15872 if (InputSize + OutputSize == 40)
15873 TgtElemArrayIdx = 0;
15874 else if (InputSize + OutputSize == 72)
15875 TgtElemArrayIdx = 1;
15876 else if (InputSize + OutputSize == 48)
15877 TgtElemArrayIdx = 2;
15878 else if (InputSize + OutputSize == 80)
15879 TgtElemArrayIdx = 3;
15880 else if (InputSize + OutputSize == 96)
15881 TgtElemArrayIdx = 4;
15882 else
15883 return SDValue();
15884
15885 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15886 CorrectElems = DAG.getDataLayout().isLittleEndian()
15887 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15888 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15889 if (Elems != CorrectElems) {
15890 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15891 }
15892
15893 // Regular lowering will catch cases where a shuffle is not needed.
15894 return SDValue();
15895}
15896
15897// Look for the pattern of a load from a narrow width to i128, feeding
15898// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15899// (LXVRZX). This node represents a zero extending load that will be matched
15900// to the Load VSX Vector Rightmost instructions.
15902 SDLoc DL(N);
15903
15904 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15905 if (N->getValueType(0) != MVT::v1i128)
15906 return SDValue();
15907
15908 SDValue Operand = N->getOperand(0);
15909 // Proceed with the transformation if the operand to the BUILD_VECTOR
15910 // is a load instruction.
15911 if (Operand.getOpcode() != ISD::LOAD)
15912 return SDValue();
15913
15914 auto *LD = cast<LoadSDNode>(Operand);
15915 EVT MemoryType = LD->getMemoryVT();
15916
15917 // This transformation is only valid if the we are loading either a byte,
15918 // halfword, word, or doubleword.
15919 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15920 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15921
15922 // Ensure that the load from the narrow width is being zero extended to i128.
15923 if (!ValidLDType ||
15924 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15925 LD->getExtensionType() != ISD::EXTLOAD))
15926 return SDValue();
15927
15928 SDValue LoadOps[] = {
15929 LD->getChain(), LD->getBasePtr(),
15930 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15931
15933 DAG.getVTList(MVT::v1i128, MVT::Other),
15934 LoadOps, MemoryType, LD->getMemOperand());
15935}
15936
15937SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15938 DAGCombinerInfo &DCI) const {
15939 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15940 "Should be called with a BUILD_VECTOR node");
15941
15942 SelectionDAG &DAG = DCI.DAG;
15943 SDLoc dl(N);
15944
15945 if (!Subtarget.hasVSX())
15946 return SDValue();
15947
15948 // The target independent DAG combiner will leave a build_vector of
15949 // float-to-int conversions intact. We can generate MUCH better code for
15950 // a float-to-int conversion of a vector of floats.
15951 SDValue FirstInput = N->getOperand(0);
15952 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15953 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15954 if (Reduced)
15955 return Reduced;
15956 }
15957
15958 // If we're building a vector out of consecutive loads, just load that
15959 // vector type.
15960 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15961 if (Reduced)
15962 return Reduced;
15963
15964 // If we're building a vector out of extended elements from another vector
15965 // we have P9 vector integer extend instructions. The code assumes legal
15966 // input types (i.e. it can't handle things like v4i16) so do not run before
15967 // legalization.
15968 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15969 Reduced = combineBVOfVecSExt(N, DAG);
15970 if (Reduced)
15971 return Reduced;
15972 }
15973
15974 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15975 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15976 // is a load from <valid narrow width> to i128.
15977 if (Subtarget.isISA3_1()) {
15978 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15979 if (BVOfZLoad)
15980 return BVOfZLoad;
15981 }
15982
15983 if (N->getValueType(0) != MVT::v2f64)
15984 return SDValue();
15985
15986 // Looking for:
15987 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15988 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15989 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15990 return SDValue();
15991 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15992 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15993 return SDValue();
15994 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15995 return SDValue();
15996
15997 SDValue Ext1 = FirstInput.getOperand(0);
15998 SDValue Ext2 = N->getOperand(1).getOperand(0);
15999 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16001 return SDValue();
16002
16003 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16004 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16005 if (!Ext1Op || !Ext2Op)
16006 return SDValue();
16007 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16008 Ext1.getOperand(0) != Ext2.getOperand(0))
16009 return SDValue();
16010
16011 int FirstElem = Ext1Op->getZExtValue();
16012 int SecondElem = Ext2Op->getZExtValue();
16013 int SubvecIdx;
16014 if (FirstElem == 0 && SecondElem == 1)
16015 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16016 else if (FirstElem == 2 && SecondElem == 3)
16017 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16018 else
16019 return SDValue();
16020
16021 SDValue SrcVec = Ext1.getOperand(0);
16022 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16024 return DAG.getNode(NodeType, dl, MVT::v2f64,
16025 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16026}
16027
16028SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16029 DAGCombinerInfo &DCI) const {
16030 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16031 N->getOpcode() == ISD::UINT_TO_FP) &&
16032 "Need an int -> FP conversion node here");
16033
16034 if (useSoftFloat() || !Subtarget.has64BitSupport())
16035 return SDValue();
16036
16037 SelectionDAG &DAG = DCI.DAG;
16038 SDLoc dl(N);
16039 SDValue Op(N, 0);
16040
16041 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16042 // from the hardware.
16043 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16044 return SDValue();
16045 if (!Op.getOperand(0).getValueType().isSimple())
16046 return SDValue();
16047 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16048 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16049 return SDValue();
16050
16051 SDValue FirstOperand(Op.getOperand(0));
16052 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16053 (FirstOperand.getValueType() == MVT::i8 ||
16054 FirstOperand.getValueType() == MVT::i16);
16055 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16056 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16057 bool DstDouble = Op.getValueType() == MVT::f64;
16058 unsigned ConvOp = Signed ?
16059 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16060 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16061 SDValue WidthConst =
16062 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16063 dl, false);
16064 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16065 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16067 DAG.getVTList(MVT::f64, MVT::Other),
16068 Ops, MVT::i8, LDN->getMemOperand());
16069 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16070
16071 // For signed conversion, we need to sign-extend the value in the VSR
16072 if (Signed) {
16073 SDValue ExtOps[] = { Ld, WidthConst };
16074 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16075 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16076 } else
16077 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16078 }
16079
16080
16081 // For i32 intermediate values, unfortunately, the conversion functions
16082 // leave the upper 32 bits of the value are undefined. Within the set of
16083 // scalar instructions, we have no method for zero- or sign-extending the
16084 // value. Thus, we cannot handle i32 intermediate values here.
16085 if (Op.getOperand(0).getValueType() == MVT::i32)
16086 return SDValue();
16087
16088 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16089 "UINT_TO_FP is supported only with FPCVT");
16090
16091 // If we have FCFIDS, then use it when converting to single-precision.
16092 // Otherwise, convert to double-precision and then round.
16093 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16094 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16096 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16097 : PPCISD::FCFID);
16098 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16099 ? MVT::f32
16100 : MVT::f64;
16101
16102 // If we're converting from a float, to an int, and back to a float again,
16103 // then we don't need the store/load pair at all.
16104 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16105 Subtarget.hasFPCVT()) ||
16106 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16107 SDValue Src = Op.getOperand(0).getOperand(0);
16108 if (Src.getValueType() == MVT::f32) {
16109 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16110 DCI.AddToWorklist(Src.getNode());
16111 } else if (Src.getValueType() != MVT::f64) {
16112 // Make sure that we don't pick up a ppc_fp128 source value.
16113 return SDValue();
16114 }
16115
16116 unsigned FCTOp =
16117 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16119
16120 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16121 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16122
16123 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16124 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16125 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16126 DCI.AddToWorklist(FP.getNode());
16127 }
16128
16129 return FP;
16130 }
16131
16132 return SDValue();
16133}
16134
16135// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16136// builtins) into loads with swaps.
16138 DAGCombinerInfo &DCI) const {
16139 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16140 // load combines.
16141 if (DCI.isBeforeLegalizeOps())
16142 return SDValue();
16143
16144 SelectionDAG &DAG = DCI.DAG;
16145 SDLoc dl(N);
16146 SDValue Chain;
16147 SDValue Base;
16148 MachineMemOperand *MMO;
16149
16150 switch (N->getOpcode()) {
16151 default:
16152 llvm_unreachable("Unexpected opcode for little endian VSX load");
16153 case ISD::LOAD: {
16154 LoadSDNode *LD = cast<LoadSDNode>(N);
16155 Chain = LD->getChain();
16156 Base = LD->getBasePtr();
16157 MMO = LD->getMemOperand();
16158 // If the MMO suggests this isn't a load of a full vector, leave
16159 // things alone. For a built-in, we have to make the change for
16160 // correctness, so if there is a size problem that will be a bug.
16161 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16162 return SDValue();
16163 break;
16164 }
16166 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
16167 Chain = Intrin->getChain();
16168 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16169 // us what we want. Get operand 2 instead.
16170 Base = Intrin->getOperand(2);
16171 MMO = Intrin->getMemOperand();
16172 break;
16173 }
16174 }
16175
16176 MVT VecTy = N->getValueType(0).getSimpleVT();
16177
16178 SDValue LoadOps[] = { Chain, Base };
16180 DAG.getVTList(MVT::v2f64, MVT::Other),
16181 LoadOps, MVT::v2f64, MMO);
16182
16183 DCI.AddToWorklist(Load.getNode());
16184 Chain = Load.getValue(1);
16185 SDValue Swap = DAG.getNode(
16186 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16187 DCI.AddToWorklist(Swap.getNode());
16188
16189 // Add a bitcast if the resulting load type doesn't match v2f64.
16190 if (VecTy != MVT::v2f64) {
16191 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16192 DCI.AddToWorklist(N.getNode());
16193 // Package {bitcast value, swap's chain} to match Load's shape.
16194 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16195 N, Swap.getValue(1));
16196 }
16197
16198 return Swap;
16199}
16200
16201// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16202// builtins) into stores with swaps.
16204 DAGCombinerInfo &DCI) const {
16205 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16206 // store combines.
16207 if (DCI.isBeforeLegalizeOps())
16208 return SDValue();
16209
16210 SelectionDAG &DAG = DCI.DAG;
16211 SDLoc dl(N);
16212 SDValue Chain;
16213 SDValue Base;
16214 unsigned SrcOpnd;
16215 MachineMemOperand *MMO;
16216
16217 switch (N->getOpcode()) {
16218 default:
16219 llvm_unreachable("Unexpected opcode for little endian VSX store");
16220 case ISD::STORE: {
16221 StoreSDNode *ST = cast<StoreSDNode>(N);
16222 Chain = ST->getChain();
16223 Base = ST->getBasePtr();
16224 MMO = ST->getMemOperand();
16225 SrcOpnd = 1;
16226 // If the MMO suggests this isn't a store of a full vector, leave
16227 // things alone. For a built-in, we have to make the change for
16228 // correctness, so if there is a size problem that will be a bug.
16229 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16230 return SDValue();
16231 break;
16232 }
16233 case ISD::INTRINSIC_VOID: {
16234 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
16235 Chain = Intrin->getChain();
16236 // Intrin->getBasePtr() oddly does not get what we want.
16237 Base = Intrin->getOperand(3);
16238 MMO = Intrin->getMemOperand();
16239 SrcOpnd = 2;
16240 break;
16241 }
16242 }
16243
16244 SDValue Src = N->getOperand(SrcOpnd);
16245 MVT VecTy = Src.getValueType().getSimpleVT();
16246
16247 // All stores are done as v2f64 and possible bit cast.
16248 if (VecTy != MVT::v2f64) {
16249 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16250 DCI.AddToWorklist(Src.getNode());
16251 }
16252
16253 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16254 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16255 DCI.AddToWorklist(Swap.getNode());
16256 Chain = Swap.getValue(1);
16257 SDValue StoreOps[] = { Chain, Swap, Base };
16259 DAG.getVTList(MVT::Other),
16260 StoreOps, VecTy, MMO);
16261 DCI.AddToWorklist(Store.getNode());
16262 return Store;
16263}
16264
16265// Handle DAG combine for STORE (FP_TO_INT F).
16266SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16267 DAGCombinerInfo &DCI) const {
16268 SelectionDAG &DAG = DCI.DAG;
16269 SDLoc dl(N);
16270 unsigned Opcode = N->getOperand(1).getOpcode();
16271 (void)Opcode;
16272 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16273
16274 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16275 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16276 && "Not a FP_TO_INT Instruction!");
16277
16278 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16279 EVT Op1VT = N->getOperand(1).getValueType();
16280 EVT ResVT = Val.getValueType();
16281
16282 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16283 return SDValue();
16284
16285 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16286 bool ValidTypeForStoreFltAsInt =
16287 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16288 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16289
16290 // TODO: Lower conversion from f128 on all VSX targets
16291 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16292 return SDValue();
16293
16294 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16295 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16296 return SDValue();
16297
16298 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16299
16300 // Set number of bytes being converted.
16301 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16302 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16303 DAG.getIntPtrConstant(ByteSize, dl, false),
16304 DAG.getValueType(Op1VT)};
16305
16307 DAG.getVTList(MVT::Other), Ops,
16308 cast<StoreSDNode>(N)->getMemoryVT(),
16309 cast<StoreSDNode>(N)->getMemOperand());
16310
16311 return Val;
16312}
16313
16314static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16315 // Check that the source of the element keeps flipping
16316 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16317 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16318 for (int i = 1, e = Mask.size(); i < e; i++) {
16319 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16320 return false;
16321 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16322 return false;
16323 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16324 }
16325 return true;
16326}
16327
16328static bool isSplatBV(SDValue Op) {
16329 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16330 return false;
16331 SDValue FirstOp;
16332
16333 // Find first non-undef input.
16334 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16335 FirstOp = Op.getOperand(i);
16336 if (!FirstOp.isUndef())
16337 break;
16338 }
16339
16340 // All inputs are undef or the same as the first non-undef input.
16341 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16342 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16343 return false;
16344 return true;
16345}
16346
16348 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16349 return Op;
16350 if (Op.getOpcode() != ISD::BITCAST)
16351 return SDValue();
16352 Op = Op.getOperand(0);
16353 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16354 return Op;
16355 return SDValue();
16356}
16357
16358// Fix up the shuffle mask to account for the fact that the result of
16359// scalar_to_vector is not in lane zero. This just takes all values in
16360// the ranges specified by the min/max indices and adds the number of
16361// elements required to ensure each element comes from the respective
16362// position in the valid lane.
16363// On little endian, that's just the corresponding element in the other
16364// half of the vector. On big endian, it is in the same half but right
16365// justified rather than left justified in that half.
16367 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16368 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16369 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16370 int LHSEltFixup =
16371 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16372 int RHSEltFixup =
16373 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16374 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16375 int Idx = ShuffV[I];
16376 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16377 ShuffV[I] += LHSEltFixup;
16378 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16379 ShuffV[I] += RHSEltFixup;
16380 }
16381}
16382
16383// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16384// the original is:
16385// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16386// In such a case, just change the shuffle mask to extract the element
16387// from the permuted index.
16389 const PPCSubtarget &Subtarget) {
16390 SDLoc dl(OrigSToV);
16391 EVT VT = OrigSToV.getValueType();
16392 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16393 "Expecting a SCALAR_TO_VECTOR here");
16394 SDValue Input = OrigSToV.getOperand(0);
16395
16396 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16397 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16398 SDValue OrigVector = Input.getOperand(0);
16399
16400 // Can't handle non-const element indices or different vector types
16401 // for the input to the extract and the output of the scalar_to_vector.
16402 if (Idx && VT == OrigVector.getValueType()) {
16403 unsigned NumElts = VT.getVectorNumElements();
16404 assert(
16405 NumElts > 1 &&
16406 "Cannot produce a permuted scalar_to_vector for one element vector");
16407 SmallVector<int, 16> NewMask(NumElts, -1);
16408 unsigned ResultInElt = NumElts / 2;
16409 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16410 NewMask[ResultInElt] = Idx->getZExtValue();
16411 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16412 }
16413 }
16414 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16415 OrigSToV.getOperand(0));
16416}
16417
16419 int HalfVec, int LHSLastElementDefined,
16420 int RHSLastElementDefined) {
16421 for (int Index : ShuffV) {
16422 if (Index < 0) // Skip explicitly undefined mask indices.
16423 continue;
16424 // Handle first input vector of the vector_shuffle.
16425 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16426 (Index > LHSLastElementDefined))
16427 return false;
16428 // Handle second input vector of the vector_shuffle.
16429 if ((RHSLastElementDefined >= 0) &&
16430 (Index > HalfVec + RHSLastElementDefined))
16431 return false;
16432 }
16433 return true;
16434}
16435
16437 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16438 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16439 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16440 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16441 // Set up the values for the shuffle vector fixup.
16442 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16443 // The last element depends on if the input comes from the LHS or RHS.
16444 //
16445 // For example:
16446 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16447 //
16448 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16449 // because elements 1 and higher of a scalar_to_vector are undefined.
16450 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16451 // because elements 1 and higher of a scalar_to_vector are undefined.
16452 // It is also not 4 because the original scalar_to_vector is wider and
16453 // actually contains two i32 elements.
16454 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16455 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16456 : FirstElt;
16457 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16458 if (SToVPermuted.getValueType() != VecShuffOperandType)
16459 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16460 return SToVPermuted;
16461}
16462
16463// On little endian subtargets, combine shuffles such as:
16464// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16465// into:
16466// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16467// because the latter can be matched to a single instruction merge.
16468// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16469// to put the value into element zero. Adjust the shuffle mask so that the
16470// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16471// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16472// nodes with elements smaller than doubleword because all the ways
16473// of getting scalar data into a vector register put the value in the
16474// rightmost element of the left half of the vector.
16475SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16476 SelectionDAG &DAG) const {
16477 SDValue LHS = SVN->getOperand(0);
16478 SDValue RHS = SVN->getOperand(1);
16479 auto Mask = SVN->getMask();
16480 int NumElts = LHS.getValueType().getVectorNumElements();
16481 SDValue Res(SVN, 0);
16482 SDLoc dl(SVN);
16483 bool IsLittleEndian = Subtarget.isLittleEndian();
16484
16485 // On big endian targets this is only useful for subtargets with direct moves.
16486 // On little endian targets it would be useful for all subtargets with VSX.
16487 // However adding special handling for LE subtargets without direct moves
16488 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16489 // which includes direct moves.
16490 if (!Subtarget.hasDirectMove())
16491 return Res;
16492
16493 // If this is not a shuffle of a shuffle and the first element comes from
16494 // the second vector, canonicalize to the commuted form. This will make it
16495 // more likely to match one of the single instruction patterns.
16496 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16497 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16498 std::swap(LHS, RHS);
16499 Res = DAG.getCommutedVectorShuffle(*SVN);
16500 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16501 }
16502
16503 // Adjust the shuffle mask if either input vector comes from a
16504 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16505 // form (to prevent the need for a swap).
16506 SmallVector<int, 16> ShuffV(Mask);
16507 SDValue SToVLHS = isScalarToVec(LHS);
16508 SDValue SToVRHS = isScalarToVec(RHS);
16509 if (SToVLHS || SToVRHS) {
16510 EVT VT = SVN->getValueType(0);
16511 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16512 int ShuffleNumElts = ShuffV.size();
16513 int HalfVec = ShuffleNumElts / 2;
16514 // The width of the "valid lane" (i.e. the lane that contains the value that
16515 // is vectorized) needs to be expressed in terms of the number of elements
16516 // of the shuffle. It is thereby the ratio of the values before and after
16517 // any bitcast, which will be set later on if the LHS or RHS are
16518 // SCALAR_TO_VECTOR nodes.
16519 unsigned LHSNumValidElts = HalfVec;
16520 unsigned RHSNumValidElts = HalfVec;
16521
16522 // Initially assume that neither input is permuted. These will be adjusted
16523 // accordingly if either input is. Note, that -1 means that all elements
16524 // are undefined.
16525 int LHSFirstElt = 0;
16526 int RHSFirstElt = ShuffleNumElts;
16527 int LHSLastElt = -1;
16528 int RHSLastElt = -1;
16529
16530 // Get the permuted scalar to vector nodes for the source(s) that come from
16531 // ISD::SCALAR_TO_VECTOR.
16532 // On big endian systems, this only makes sense for element sizes smaller
16533 // than 64 bits since for 64-bit elements, all instructions already put
16534 // the value into element zero. Since scalar size of LHS and RHS may differ
16535 // after isScalarToVec, this should be checked using their own sizes.
16536 int LHSScalarSize = 0;
16537 int RHSScalarSize = 0;
16538 if (SToVLHS) {
16539 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16540 if (!IsLittleEndian && LHSScalarSize >= 64)
16541 return Res;
16542 }
16543 if (SToVRHS) {
16544 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16545 if (!IsLittleEndian && RHSScalarSize >= 64)
16546 return Res;
16547 }
16548 if (LHSScalarSize != 0)
16550 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16551 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16552 if (RHSScalarSize != 0)
16554 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16555 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16556
16557 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16558 return Res;
16559
16560 // Fix up the shuffle mask to reflect where the desired element actually is.
16561 // The minimum and maximum indices that correspond to element zero for both
16562 // the LHS and RHS are computed and will control which shuffle mask entries
16563 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16564 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16566 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16567 LHSNumValidElts, RHSNumValidElts, Subtarget);
16568 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16569
16570 // We may have simplified away the shuffle. We won't be able to do anything
16571 // further with it here.
16572 if (!isa<ShuffleVectorSDNode>(Res))
16573 return Res;
16574 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16575 }
16576
16577 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16578 // The common case after we commuted the shuffle is that the RHS is a splat
16579 // and we have elements coming in from the splat at indices that are not
16580 // conducive to using a merge.
16581 // Example:
16582 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16583 if (!isSplatBV(TheSplat))
16584 return Res;
16585
16586 // We are looking for a mask such that all even elements are from
16587 // one vector and all odd elements from the other.
16588 if (!isAlternatingShuffMask(Mask, NumElts))
16589 return Res;
16590
16591 // Adjust the mask so we are pulling in the same index from the splat
16592 // as the index from the interesting vector in consecutive elements.
16593 if (IsLittleEndian) {
16594 // Example (even elements from first vector):
16595 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16596 if (Mask[0] < NumElts)
16597 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16598 if (ShuffV[i] < 0)
16599 continue;
16600 // If element from non-splat is undef, pick first element from splat.
16601 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16602 }
16603 // Example (odd elements from first vector):
16604 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16605 else
16606 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16607 if (ShuffV[i] < 0)
16608 continue;
16609 // If element from non-splat is undef, pick first element from splat.
16610 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16611 }
16612 } else {
16613 // Example (even elements from first vector):
16614 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16615 if (Mask[0] < NumElts)
16616 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16617 if (ShuffV[i] < 0)
16618 continue;
16619 // If element from non-splat is undef, pick first element from splat.
16620 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16621 }
16622 // Example (odd elements from first vector):
16623 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16624 else
16625 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16626 if (ShuffV[i] < 0)
16627 continue;
16628 // If element from non-splat is undef, pick first element from splat.
16629 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16630 }
16631 }
16632
16633 // If the RHS has undefs, we need to remove them since we may have created
16634 // a shuffle that adds those instead of the splat value.
16635 SDValue SplatVal =
16636 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16637 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16638
16639 if (IsLittleEndian)
16640 RHS = TheSplat;
16641 else
16642 LHS = TheSplat;
16643 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16644}
16645
16646SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16647 LSBaseSDNode *LSBase,
16648 DAGCombinerInfo &DCI) const {
16649 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16650 "Not a reverse memop pattern!");
16651
16652 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16653 auto Mask = SVN->getMask();
16654 int i = 0;
16655 auto I = Mask.rbegin();
16656 auto E = Mask.rend();
16657
16658 for (; I != E; ++I) {
16659 if (*I != i)
16660 return false;
16661 i++;
16662 }
16663 return true;
16664 };
16665
16666 SelectionDAG &DAG = DCI.DAG;
16667 EVT VT = SVN->getValueType(0);
16668
16669 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16670 return SDValue();
16671
16672 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16673 // See comment in PPCVSXSwapRemoval.cpp.
16674 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16675 if (!Subtarget.hasP9Vector())
16676 return SDValue();
16677
16678 if(!IsElementReverse(SVN))
16679 return SDValue();
16680
16681 if (LSBase->getOpcode() == ISD::LOAD) {
16682 // If the load return value 0 has more than one user except the
16683 // shufflevector instruction, it is not profitable to replace the
16684 // shufflevector with a reverse load.
16685 for (SDUse &Use : LSBase->uses())
16686 if (Use.getResNo() == 0 &&
16687 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16688 return SDValue();
16689
16690 SDLoc dl(LSBase);
16691 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16692 return DAG.getMemIntrinsicNode(
16693 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16694 LSBase->getMemoryVT(), LSBase->getMemOperand());
16695 }
16696
16697 if (LSBase->getOpcode() == ISD::STORE) {
16698 // If there are other uses of the shuffle, the swap cannot be avoided.
16699 // Forcing the use of an X-Form (since swapped stores only have
16700 // X-Forms) without removing the swap is unprofitable.
16701 if (!SVN->hasOneUse())
16702 return SDValue();
16703
16704 SDLoc dl(LSBase);
16705 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16706 LSBase->getBasePtr()};
16707 return DAG.getMemIntrinsicNode(
16708 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16709 LSBase->getMemoryVT(), LSBase->getMemOperand());
16710 }
16711
16712 llvm_unreachable("Expected a load or store node here");
16713}
16714
16715static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16716 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16717 if (IntrinsicID == Intrinsic::ppc_stdcx)
16718 StoreWidth = 8;
16719 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16720 StoreWidth = 4;
16721 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16722 StoreWidth = 2;
16723 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16724 StoreWidth = 1;
16725 else
16726 return false;
16727 return true;
16728}
16729
16732 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
16733 // (ADDC (ADDE 0, 0, C), -1) -> C
16734 SDValue LHS = N->getOperand(0);
16735 SDValue RHS = N->getOperand(1);
16736 if (LHS->getOpcode() == PPCISD::ADDE &&
16737 isNullConstant(LHS->getOperand(0)) &&
16738 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
16739 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
16740 }
16741 }
16742 return SDValue();
16743}
16744
16746 DAGCombinerInfo &DCI) const {
16747 SelectionDAG &DAG = DCI.DAG;
16748 SDLoc dl(N);
16749 switch (N->getOpcode()) {
16750 default: break;
16751 case ISD::ADD:
16752 return combineADD(N, DCI);
16753 case ISD::AND: {
16754 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16755 // original input as that will prevent us from selecting optimal rotates.
16756 // This only matters if the input to the extend is i32 widened to i64.
16757 SDValue Op1 = N->getOperand(0);
16758 SDValue Op2 = N->getOperand(1);
16759 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16760 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16761 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16762 Op1.getOperand(0).getValueType() != MVT::i32)
16763 break;
16764 SDValue NarrowOp = Op1.getOperand(0);
16765 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16766 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16767 break;
16768
16769 uint64_t Imm = Op2->getAsZExtVal();
16770 // Make sure that the constant is narrow enough to fit in the narrow type.
16771 if (!isUInt<32>(Imm))
16772 break;
16773 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16774 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16775 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16776 }
16777 case ISD::SHL:
16778 return combineSHL(N, DCI);
16779 case ISD::SRA:
16780 return combineSRA(N, DCI);
16781 case ISD::SRL:
16782 return combineSRL(N, DCI);
16783 case ISD::MUL:
16784 return combineMUL(N, DCI);
16785 case ISD::FMA:
16786 case PPCISD::FNMSUB:
16787 return combineFMALike(N, DCI);
16788 case PPCISD::SHL:
16789 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16790 return N->getOperand(0);
16791 break;
16792 case PPCISD::SRL:
16793 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16794 return N->getOperand(0);
16795 break;
16796 case PPCISD::SRA:
16797 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16798 if (C->isZero() || // 0 >>s V -> 0.
16799 C->isAllOnes()) // -1 >>s V -> -1.
16800 return N->getOperand(0);
16801 }
16802 break;
16803 case ISD::SIGN_EXTEND:
16804 case ISD::ZERO_EXTEND:
16805 case ISD::ANY_EXTEND:
16806 return DAGCombineExtBoolTrunc(N, DCI);
16807 case ISD::TRUNCATE:
16808 return combineTRUNCATE(N, DCI);
16809 case ISD::SETCC:
16810 if (SDValue CSCC = combineSetCC(N, DCI))
16811 return CSCC;
16812 [[fallthrough]];
16813 case ISD::SELECT_CC:
16814 return DAGCombineTruncBoolExt(N, DCI);
16815 case ISD::SINT_TO_FP:
16816 case ISD::UINT_TO_FP:
16817 return combineFPToIntToFP(N, DCI);
16819 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16820 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16821 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16822 }
16823 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16824 case ISD::STORE: {
16825
16826 EVT Op1VT = N->getOperand(1).getValueType();
16827 unsigned Opcode = N->getOperand(1).getOpcode();
16828
16829 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16830 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16831 SDValue Val = combineStoreFPToInt(N, DCI);
16832 if (Val)
16833 return Val;
16834 }
16835
16836 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16837 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16838 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16839 if (Val)
16840 return Val;
16841 }
16842
16843 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16844 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16845 N->getOperand(1).getNode()->hasOneUse() &&
16846 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16847 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16848
16849 // STBRX can only handle simple types and it makes no sense to store less
16850 // two bytes in byte-reversed order.
16851 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16852 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16853 break;
16854
16855 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16856 // Do an any-extend to 32-bits if this is a half-word input.
16857 if (BSwapOp.getValueType() == MVT::i16)
16858 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16859
16860 // If the type of BSWAP operand is wider than stored memory width
16861 // it need to be shifted to the right side before STBRX.
16862 if (Op1VT.bitsGT(mVT)) {
16863 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16864 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16865 DAG.getConstant(Shift, dl, MVT::i32));
16866 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16867 if (Op1VT == MVT::i64)
16868 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16869 }
16870
16871 SDValue Ops[] = {
16872 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16873 };
16874 return
16875 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16876 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16877 cast<StoreSDNode>(N)->getMemOperand());
16878 }
16879
16880 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16881 // So it can increase the chance of CSE constant construction.
16882 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16883 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16884 // Need to sign-extended to 64-bits to handle negative values.
16885 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16886 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16887 MemVT.getSizeInBits());
16888 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16889
16890 auto *ST = cast<StoreSDNode>(N);
16891 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
16892 ST->getBasePtr(), ST->getOffset(), MemVT,
16893 ST->getMemOperand(), ST->getAddressingMode(),
16894 /*IsTruncating=*/true);
16895 // Note we use CombineTo here to prevent DAGCombiner from visiting the
16896 // new store which will change the constant by removing non-demanded bits.
16897 return ST->isUnindexed()
16898 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
16899 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
16900 }
16901
16902 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16903 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16904 if (Op1VT.isSimple()) {
16905 MVT StoreVT = Op1VT.getSimpleVT();
16906 if (Subtarget.needsSwapsForVSXMemOps() &&
16907 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16908 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16909 return expandVSXStoreForLE(N, DCI);
16910 }
16911 break;
16912 }
16913 case ISD::LOAD: {
16914 LoadSDNode *LD = cast<LoadSDNode>(N);
16915 EVT VT = LD->getValueType(0);
16916
16917 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16918 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16919 if (VT.isSimple()) {
16920 MVT LoadVT = VT.getSimpleVT();
16921 if (Subtarget.needsSwapsForVSXMemOps() &&
16922 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16923 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16924 return expandVSXLoadForLE(N, DCI);
16925 }
16926
16927 // We sometimes end up with a 64-bit integer load, from which we extract
16928 // two single-precision floating-point numbers. This happens with
16929 // std::complex<float>, and other similar structures, because of the way we
16930 // canonicalize structure copies. However, if we lack direct moves,
16931 // then the final bitcasts from the extracted integer values to the
16932 // floating-point numbers turn into store/load pairs. Even with direct moves,
16933 // just loading the two floating-point numbers is likely better.
16934 auto ReplaceTwoFloatLoad = [&]() {
16935 if (VT != MVT::i64)
16936 return false;
16937
16938 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16939 LD->isVolatile())
16940 return false;
16941
16942 // We're looking for a sequence like this:
16943 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16944 // t16: i64 = srl t13, Constant:i32<32>
16945 // t17: i32 = truncate t16
16946 // t18: f32 = bitcast t17
16947 // t19: i32 = truncate t13
16948 // t20: f32 = bitcast t19
16949
16950 if (!LD->hasNUsesOfValue(2, 0))
16951 return false;
16952
16953 auto UI = LD->user_begin();
16954 while (UI.getUse().getResNo() != 0) ++UI;
16955 SDNode *Trunc = *UI++;
16956 while (UI.getUse().getResNo() != 0) ++UI;
16957 SDNode *RightShift = *UI;
16958 if (Trunc->getOpcode() != ISD::TRUNCATE)
16959 std::swap(Trunc, RightShift);
16960
16961 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16962 Trunc->getValueType(0) != MVT::i32 ||
16963 !Trunc->hasOneUse())
16964 return false;
16965 if (RightShift->getOpcode() != ISD::SRL ||
16966 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16967 RightShift->getConstantOperandVal(1) != 32 ||
16968 !RightShift->hasOneUse())
16969 return false;
16970
16971 SDNode *Trunc2 = *RightShift->user_begin();
16972 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16973 Trunc2->getValueType(0) != MVT::i32 ||
16974 !Trunc2->hasOneUse())
16975 return false;
16976
16977 SDNode *Bitcast = *Trunc->user_begin();
16978 SDNode *Bitcast2 = *Trunc2->user_begin();
16979
16980 if (Bitcast->getOpcode() != ISD::BITCAST ||
16981 Bitcast->getValueType(0) != MVT::f32)
16982 return false;
16983 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16984 Bitcast2->getValueType(0) != MVT::f32)
16985 return false;
16986
16987 if (Subtarget.isLittleEndian())
16988 std::swap(Bitcast, Bitcast2);
16989
16990 // Bitcast has the second float (in memory-layout order) and Bitcast2
16991 // has the first one.
16992
16993 SDValue BasePtr = LD->getBasePtr();
16994 if (LD->isIndexed()) {
16995 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16996 "Non-pre-inc AM on PPC?");
16997 BasePtr =
16998 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16999 LD->getOffset());
17000 }
17001
17002 auto MMOFlags =
17003 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17004 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17005 LD->getPointerInfo(), LD->getAlign(),
17006 MMOFlags, LD->getAAInfo());
17007 SDValue AddPtr =
17008 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17009 BasePtr, DAG.getIntPtrConstant(4, dl));
17010 SDValue FloatLoad2 = DAG.getLoad(
17011 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17012 LD->getPointerInfo().getWithOffset(4),
17013 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17014
17015 if (LD->isIndexed()) {
17016 // Note that DAGCombine should re-form any pre-increment load(s) from
17017 // what is produced here if that makes sense.
17018 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17019 }
17020
17021 DCI.CombineTo(Bitcast2, FloatLoad);
17022 DCI.CombineTo(Bitcast, FloatLoad2);
17023
17024 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17025 SDValue(FloatLoad2.getNode(), 1));
17026 return true;
17027 };
17028
17029 if (ReplaceTwoFloatLoad())
17030 return SDValue(N, 0);
17031
17032 EVT MemVT = LD->getMemoryVT();
17033 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17034 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17035 if (LD->isUnindexed() && VT.isVector() &&
17036 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17037 // P8 and later hardware should just use LOAD.
17038 !Subtarget.hasP8Vector() &&
17039 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17040 VT == MVT::v4f32))) &&
17041 LD->getAlign() < ABIAlignment) {
17042 // This is a type-legal unaligned Altivec load.
17043 SDValue Chain = LD->getChain();
17044 SDValue Ptr = LD->getBasePtr();
17045 bool isLittleEndian = Subtarget.isLittleEndian();
17046
17047 // This implements the loading of unaligned vectors as described in
17048 // the venerable Apple Velocity Engine overview. Specifically:
17049 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17050 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17051 //
17052 // The general idea is to expand a sequence of one or more unaligned
17053 // loads into an alignment-based permutation-control instruction (lvsl
17054 // or lvsr), a series of regular vector loads (which always truncate
17055 // their input address to an aligned address), and a series of
17056 // permutations. The results of these permutations are the requested
17057 // loaded values. The trick is that the last "extra" load is not taken
17058 // from the address you might suspect (sizeof(vector) bytes after the
17059 // last requested load), but rather sizeof(vector) - 1 bytes after the
17060 // last requested vector. The point of this is to avoid a page fault if
17061 // the base address happened to be aligned. This works because if the
17062 // base address is aligned, then adding less than a full vector length
17063 // will cause the last vector in the sequence to be (re)loaded.
17064 // Otherwise, the next vector will be fetched as you might suspect was
17065 // necessary.
17066
17067 // We might be able to reuse the permutation generation from
17068 // a different base address offset from this one by an aligned amount.
17069 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17070 // optimization later.
17071 Intrinsic::ID Intr, IntrLD, IntrPerm;
17072 MVT PermCntlTy, PermTy, LDTy;
17073 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17074 : Intrinsic::ppc_altivec_lvsl;
17075 IntrLD = Intrinsic::ppc_altivec_lvx;
17076 IntrPerm = Intrinsic::ppc_altivec_vperm;
17077 PermCntlTy = MVT::v16i8;
17078 PermTy = MVT::v4i32;
17079 LDTy = MVT::v4i32;
17080
17081 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17082
17083 // Create the new MMO for the new base load. It is like the original MMO,
17084 // but represents an area in memory almost twice the vector size centered
17085 // on the original address. If the address is unaligned, we might start
17086 // reading up to (sizeof(vector)-1) bytes below the address of the
17087 // original unaligned load.
17089 MachineMemOperand *BaseMMO =
17090 MF.getMachineMemOperand(LD->getMemOperand(),
17091 -(int64_t)MemVT.getStoreSize()+1,
17092 2*MemVT.getStoreSize()-1);
17093
17094 // Create the new base load.
17095 SDValue LDXIntID =
17096 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17097 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17098 SDValue BaseLoad =
17100 DAG.getVTList(PermTy, MVT::Other),
17101 BaseLoadOps, LDTy, BaseMMO);
17102
17103 // Note that the value of IncOffset (which is provided to the next
17104 // load's pointer info offset value, and thus used to calculate the
17105 // alignment), and the value of IncValue (which is actually used to
17106 // increment the pointer value) are different! This is because we
17107 // require the next load to appear to be aligned, even though it
17108 // is actually offset from the base pointer by a lesser amount.
17109 int IncOffset = VT.getSizeInBits() / 8;
17110 int IncValue = IncOffset;
17111
17112 // Walk (both up and down) the chain looking for another load at the real
17113 // (aligned) offset (the alignment of the other load does not matter in
17114 // this case). If found, then do not use the offset reduction trick, as
17115 // that will prevent the loads from being later combined (as they would
17116 // otherwise be duplicates).
17117 if (!findConsecutiveLoad(LD, DAG))
17118 --IncValue;
17119
17120 SDValue Increment =
17121 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17122 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17123
17124 MachineMemOperand *ExtraMMO =
17125 MF.getMachineMemOperand(LD->getMemOperand(),
17126 1, 2*MemVT.getStoreSize()-1);
17127 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17128 SDValue ExtraLoad =
17130 DAG.getVTList(PermTy, MVT::Other),
17131 ExtraLoadOps, LDTy, ExtraMMO);
17132
17133 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17134 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17135
17136 // Because vperm has a big-endian bias, we must reverse the order
17137 // of the input vectors and complement the permute control vector
17138 // when generating little endian code. We have already handled the
17139 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17140 // and ExtraLoad here.
17141 SDValue Perm;
17142 if (isLittleEndian)
17143 Perm = BuildIntrinsicOp(IntrPerm,
17144 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17145 else
17146 Perm = BuildIntrinsicOp(IntrPerm,
17147 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17148
17149 if (VT != PermTy)
17150 Perm = Subtarget.hasAltivec()
17151 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17152 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17153 DAG.getTargetConstant(1, dl, MVT::i64));
17154 // second argument is 1 because this rounding
17155 // is always exact.
17156
17157 // The output of the permutation is our loaded result, the TokenFactor is
17158 // our new chain.
17159 DCI.CombineTo(N, Perm, TF);
17160 return SDValue(N, 0);
17161 }
17162 }
17163 break;
17165 bool isLittleEndian = Subtarget.isLittleEndian();
17166 unsigned IID = N->getConstantOperandVal(0);
17167 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17168 : Intrinsic::ppc_altivec_lvsl);
17169 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17170 SDValue Add = N->getOperand(1);
17171
17172 int Bits = 4 /* 16 byte alignment */;
17173
17174 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17175 APInt::getAllOnes(Bits /* alignment */)
17176 .zext(Add.getScalarValueSizeInBits()))) {
17177 SDNode *BasePtr = Add->getOperand(0).getNode();
17178 for (SDNode *U : BasePtr->users()) {
17179 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17180 U->getConstantOperandVal(0) == IID) {
17181 // We've found another LVSL/LVSR, and this address is an aligned
17182 // multiple of that one. The results will be the same, so use the
17183 // one we've just found instead.
17184
17185 return SDValue(U, 0);
17186 }
17187 }
17188 }
17189
17190 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17191 SDNode *BasePtr = Add->getOperand(0).getNode();
17192 for (SDNode *U : BasePtr->users()) {
17193 if (U->getOpcode() == ISD::ADD &&
17194 isa<ConstantSDNode>(U->getOperand(1)) &&
17195 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17196 (1ULL << Bits) ==
17197 0) {
17198 SDNode *OtherAdd = U;
17199 for (SDNode *V : OtherAdd->users()) {
17200 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17201 V->getConstantOperandVal(0) == IID) {
17202 return SDValue(V, 0);
17203 }
17204 }
17205 }
17206 }
17207 }
17208 }
17209
17210 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17211 // Expose the vabsduw/h/b opportunity for down stream
17212 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17213 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17214 IID == Intrinsic::ppc_altivec_vmaxsh ||
17215 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17216 SDValue V1 = N->getOperand(1);
17217 SDValue V2 = N->getOperand(2);
17218 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17219 V1.getSimpleValueType() == MVT::v8i16 ||
17220 V1.getSimpleValueType() == MVT::v16i8) &&
17222 // (0-a, a)
17223 if (V1.getOpcode() == ISD::SUB &&
17225 V1.getOperand(1) == V2) {
17226 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17227 }
17228 // (a, 0-a)
17229 if (V2.getOpcode() == ISD::SUB &&
17231 V2.getOperand(1) == V1) {
17232 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17233 }
17234 // (x-y, y-x)
17235 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17236 V1.getOperand(0) == V2.getOperand(1) &&
17237 V1.getOperand(1) == V2.getOperand(0)) {
17238 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17239 }
17240 }
17241 }
17242 }
17243
17244 break;
17246 switch (N->getConstantOperandVal(1)) {
17247 default:
17248 break;
17249 case Intrinsic::ppc_altivec_vsum4sbs:
17250 case Intrinsic::ppc_altivec_vsum4shs:
17251 case Intrinsic::ppc_altivec_vsum4ubs: {
17252 // These sum-across intrinsics only have a chain due to the side effect
17253 // that they may set the SAT bit. If we know the SAT bit will not be set
17254 // for some inputs, we can replace any uses of their chain with the
17255 // input chain.
17256 if (BuildVectorSDNode *BVN =
17257 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17258 APInt APSplatBits, APSplatUndef;
17259 unsigned SplatBitSize;
17260 bool HasAnyUndefs;
17261 bool BVNIsConstantSplat = BVN->isConstantSplat(
17262 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17263 !Subtarget.isLittleEndian());
17264 // If the constant splat vector is 0, the SAT bit will not be set.
17265 if (BVNIsConstantSplat && APSplatBits == 0)
17266 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17267 }
17268 return SDValue();
17269 }
17270 case Intrinsic::ppc_vsx_lxvw4x:
17271 case Intrinsic::ppc_vsx_lxvd2x:
17272 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17273 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17274 if (Subtarget.needsSwapsForVSXMemOps())
17275 return expandVSXLoadForLE(N, DCI);
17276 break;
17277 }
17278 break;
17280 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17281 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17282 if (Subtarget.needsSwapsForVSXMemOps()) {
17283 switch (N->getConstantOperandVal(1)) {
17284 default:
17285 break;
17286 case Intrinsic::ppc_vsx_stxvw4x:
17287 case Intrinsic::ppc_vsx_stxvd2x:
17288 return expandVSXStoreForLE(N, DCI);
17289 }
17290 }
17291 break;
17292 case ISD::BSWAP: {
17293 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17294 // For subtargets without LDBRX, we can still do better than the default
17295 // expansion even for 64-bit BSWAP (LOAD).
17296 bool Is64BitBswapOn64BitTgt =
17297 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17298 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17299 N->getOperand(0).hasOneUse();
17300 if (IsSingleUseNormalLd &&
17301 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17302 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17303 SDValue Load = N->getOperand(0);
17304 LoadSDNode *LD = cast<LoadSDNode>(Load);
17305 // Create the byte-swapping load.
17306 SDValue Ops[] = {
17307 LD->getChain(), // Chain
17308 LD->getBasePtr(), // Ptr
17309 DAG.getValueType(N->getValueType(0)) // VT
17310 };
17311 SDValue BSLoad =
17313 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17314 MVT::i64 : MVT::i32, MVT::Other),
17315 Ops, LD->getMemoryVT(), LD->getMemOperand());
17316
17317 // If this is an i16 load, insert the truncate.
17318 SDValue ResVal = BSLoad;
17319 if (N->getValueType(0) == MVT::i16)
17320 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17321
17322 // First, combine the bswap away. This makes the value produced by the
17323 // load dead.
17324 DCI.CombineTo(N, ResVal);
17325
17326 // Next, combine the load away, we give it a bogus result value but a real
17327 // chain result. The result value is dead because the bswap is dead.
17328 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17329
17330 // Return N so it doesn't get rechecked!
17331 return SDValue(N, 0);
17332 }
17333 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17334 // before legalization so that the BUILD_PAIR is handled correctly.
17335 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17336 !IsSingleUseNormalLd)
17337 return SDValue();
17338 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17339
17340 // Can't split volatile or atomic loads.
17341 if (!LD->isSimple())
17342 return SDValue();
17343 SDValue BasePtr = LD->getBasePtr();
17344 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17345 LD->getPointerInfo(), LD->getAlign());
17346 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17347 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17348 DAG.getIntPtrConstant(4, dl));
17350 LD->getMemOperand(), 4, 4);
17351 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17352 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17353 SDValue Res;
17354 if (Subtarget.isLittleEndian())
17355 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17356 else
17357 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17358 SDValue TF =
17359 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17360 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17361 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17362 return Res;
17363 }
17364 case PPCISD::VCMP:
17365 // If a VCMP_rec node already exists with exactly the same operands as this
17366 // node, use its result instead of this node (VCMP_rec computes both a CR6
17367 // and a normal output).
17368 //
17369 if (!N->getOperand(0).hasOneUse() &&
17370 !N->getOperand(1).hasOneUse() &&
17371 !N->getOperand(2).hasOneUse()) {
17372
17373 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17374 SDNode *VCMPrecNode = nullptr;
17375
17376 SDNode *LHSN = N->getOperand(0).getNode();
17377 for (SDNode *User : LHSN->users())
17378 if (User->getOpcode() == PPCISD::VCMP_rec &&
17379 User->getOperand(1) == N->getOperand(1) &&
17380 User->getOperand(2) == N->getOperand(2) &&
17381 User->getOperand(0) == N->getOperand(0)) {
17382 VCMPrecNode = User;
17383 break;
17384 }
17385
17386 // If there is no VCMP_rec node, or if the flag value has a single use,
17387 // don't transform this.
17388 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17389 break;
17390
17391 // Look at the (necessarily single) use of the flag value. If it has a
17392 // chain, this transformation is more complex. Note that multiple things
17393 // could use the value result, which we should ignore.
17394 SDNode *FlagUser = nullptr;
17395 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17396 FlagUser == nullptr; ++UI) {
17397 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17398 SDNode *User = UI->getUser();
17399 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17400 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17401 FlagUser = User;
17402 break;
17403 }
17404 }
17405 }
17406
17407 // If the user is a MFOCRF instruction, we know this is safe.
17408 // Otherwise we give up for right now.
17409 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17410 return SDValue(VCMPrecNode, 0);
17411 }
17412 break;
17413 case ISD::BR_CC: {
17414 // If this is a branch on an altivec predicate comparison, lower this so
17415 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17416 // lowering is done pre-legalize, because the legalizer lowers the predicate
17417 // compare down to code that is difficult to reassemble.
17418 // This code also handles branches that depend on the result of a store
17419 // conditional.
17420 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17421 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17422
17423 int CompareOpc;
17424 bool isDot;
17425
17426 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17427 break;
17428
17429 // Since we are doing this pre-legalize, the RHS can be a constant of
17430 // arbitrary bitwidth which may cause issues when trying to get the value
17431 // from the underlying APInt.
17432 auto RHSAPInt = RHS->getAsAPIntVal();
17433 if (!RHSAPInt.isIntN(64))
17434 break;
17435
17436 unsigned Val = RHSAPInt.getZExtValue();
17437 auto isImpossibleCompare = [&]() {
17438 // If this is a comparison against something other than 0/1, then we know
17439 // that the condition is never/always true.
17440 if (Val != 0 && Val != 1) {
17441 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17442 return N->getOperand(0);
17443 // Always !=, turn it into an unconditional branch.
17444 return DAG.getNode(ISD::BR, dl, MVT::Other,
17445 N->getOperand(0), N->getOperand(4));
17446 }
17447 return SDValue();
17448 };
17449 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17450 unsigned StoreWidth = 0;
17451 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17452 isStoreConditional(LHS, StoreWidth)) {
17453 if (SDValue Impossible = isImpossibleCompare())
17454 return Impossible;
17455 PPC::Predicate CompOpc;
17456 // eq 0 => ne
17457 // ne 0 => eq
17458 // eq 1 => eq
17459 // ne 1 => ne
17460 if (Val == 0)
17461 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17462 else
17463 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17464
17465 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17466 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17467 auto *MemNode = cast<MemSDNode>(LHS);
17468 SDValue ConstSt = DAG.getMemIntrinsicNode(
17470 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17471 MemNode->getMemoryVT(), MemNode->getMemOperand());
17472
17473 SDValue InChain;
17474 // Unchain the branch from the original store conditional.
17475 if (N->getOperand(0) == LHS.getValue(1))
17476 InChain = LHS.getOperand(0);
17477 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17478 SmallVector<SDValue, 4> InChains;
17479 SDValue InTF = N->getOperand(0);
17480 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17481 if (InTF.getOperand(i) != LHS.getValue(1))
17482 InChains.push_back(InTF.getOperand(i));
17483 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17484 }
17485
17486 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17487 DAG.getConstant(CompOpc, dl, MVT::i32),
17488 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17489 ConstSt.getValue(2));
17490 }
17491
17492 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17493 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17494 assert(isDot && "Can't compare against a vector result!");
17495
17496 if (SDValue Impossible = isImpossibleCompare())
17497 return Impossible;
17498
17499 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17500 // Create the PPCISD altivec 'dot' comparison node.
17501 SDValue Ops[] = {
17502 LHS.getOperand(2), // LHS of compare
17503 LHS.getOperand(3), // RHS of compare
17504 DAG.getConstant(CompareOpc, dl, MVT::i32)
17505 };
17506 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17507 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17508
17509 // Unpack the result based on how the target uses it.
17510 PPC::Predicate CompOpc;
17511 switch (LHS.getConstantOperandVal(1)) {
17512 default: // Can't happen, don't crash on invalid number though.
17513 case 0: // Branch on the value of the EQ bit of CR6.
17514 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17515 break;
17516 case 1: // Branch on the inverted value of the EQ bit of CR6.
17517 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17518 break;
17519 case 2: // Branch on the value of the LT bit of CR6.
17520 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17521 break;
17522 case 3: // Branch on the inverted value of the LT bit of CR6.
17523 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17524 break;
17525 }
17526
17527 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17528 DAG.getConstant(CompOpc, dl, MVT::i32),
17529 DAG.getRegister(PPC::CR6, MVT::i32),
17530 N->getOperand(4), CompNode.getValue(1));
17531 }
17532 break;
17533 }
17534 case ISD::BUILD_VECTOR:
17535 return DAGCombineBuildVector(N, DCI);
17536 case PPCISD::ADDC:
17537 return DAGCombineAddc(N, DCI);
17538 }
17539
17540 return SDValue();
17541}
17542
17543SDValue
17545 SelectionDAG &DAG,
17546 SmallVectorImpl<SDNode *> &Created) const {
17547 // fold (sdiv X, pow2)
17548 EVT VT = N->getValueType(0);
17549 if (VT == MVT::i64 && !Subtarget.isPPC64())
17550 return SDValue();
17551 if ((VT != MVT::i32 && VT != MVT::i64) ||
17552 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17553 return SDValue();
17554
17555 SDLoc DL(N);
17556 SDValue N0 = N->getOperand(0);
17557
17558 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17559 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17560 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17561
17562 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17563 Created.push_back(Op.getNode());
17564
17565 if (IsNegPow2) {
17566 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17567 Created.push_back(Op.getNode());
17568 }
17569
17570 return Op;
17571}
17572
17573//===----------------------------------------------------------------------===//
17574// Inline Assembly Support
17575//===----------------------------------------------------------------------===//
17576
17578 KnownBits &Known,
17579 const APInt &DemandedElts,
17580 const SelectionDAG &DAG,
17581 unsigned Depth) const {
17582 Known.resetAll();
17583 switch (Op.getOpcode()) {
17584 default: break;
17585 case PPCISD::LBRX: {
17586 // lhbrx is known to have the top bits cleared out.
17587 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17588 Known.Zero = 0xFFFF0000;
17589 break;
17590 }
17591 case PPCISD::ADDE: {
17592 if (Op.getResNo() == 0) {
17593 // (0|1), _ = ADDE 0, 0, CARRY
17594 SDValue LHS = Op.getOperand(0);
17595 SDValue RHS = Op.getOperand(1);
17597 Known.Zero = ~1ULL;
17598 }
17599 break;
17600 }
17602 switch (Op.getConstantOperandVal(0)) {
17603 default: break;
17604 case Intrinsic::ppc_altivec_vcmpbfp_p:
17605 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17606 case Intrinsic::ppc_altivec_vcmpequb_p:
17607 case Intrinsic::ppc_altivec_vcmpequh_p:
17608 case Intrinsic::ppc_altivec_vcmpequw_p:
17609 case Intrinsic::ppc_altivec_vcmpequd_p:
17610 case Intrinsic::ppc_altivec_vcmpequq_p:
17611 case Intrinsic::ppc_altivec_vcmpgefp_p:
17612 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17613 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17614 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17615 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17616 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17617 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17618 case Intrinsic::ppc_altivec_vcmpgtub_p:
17619 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17620 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17621 case Intrinsic::ppc_altivec_vcmpgtud_p:
17622 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17623 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17624 break;
17625 }
17626 break;
17627 }
17629 switch (Op.getConstantOperandVal(1)) {
17630 default:
17631 break;
17632 case Intrinsic::ppc_load2r:
17633 // Top bits are cleared for load2r (which is the same as lhbrx).
17634 Known.Zero = 0xFFFF0000;
17635 break;
17636 }
17637 break;
17638 }
17639 }
17640}
17641
17643 switch (Subtarget.getCPUDirective()) {
17644 default: break;
17645 case PPC::DIR_970:
17646 case PPC::DIR_PWR4:
17647 case PPC::DIR_PWR5:
17648 case PPC::DIR_PWR5X:
17649 case PPC::DIR_PWR6:
17650 case PPC::DIR_PWR6X:
17651 case PPC::DIR_PWR7:
17652 case PPC::DIR_PWR8:
17653 case PPC::DIR_PWR9:
17654 case PPC::DIR_PWR10:
17655 case PPC::DIR_PWR11:
17656 case PPC::DIR_PWR_FUTURE: {
17657 if (!ML)
17658 break;
17659
17661 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17662 // so that we can decrease cache misses and branch-prediction misses.
17663 // Actual alignment of the loop will depend on the hotness check and other
17664 // logic in alignBlocks.
17665 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17666 return Align(32);
17667 }
17668
17669 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17670
17671 // For small loops (between 5 and 8 instructions), align to a 32-byte
17672 // boundary so that the entire loop fits in one instruction-cache line.
17673 uint64_t LoopSize = 0;
17674 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17675 for (const MachineInstr &J : **I) {
17676 LoopSize += TII->getInstSizeInBytes(J);
17677 if (LoopSize > 32)
17678 break;
17679 }
17680
17681 if (LoopSize > 16 && LoopSize <= 32)
17682 return Align(32);
17683
17684 break;
17685 }
17686 }
17687
17689}
17690
17691/// getConstraintType - Given a constraint, return the type of
17692/// constraint it is for this target.
17695 if (Constraint.size() == 1) {
17696 switch (Constraint[0]) {
17697 default: break;
17698 case 'b':
17699 case 'r':
17700 case 'f':
17701 case 'd':
17702 case 'v':
17703 case 'y':
17704 return C_RegisterClass;
17705 case 'Z':
17706 // FIXME: While Z does indicate a memory constraint, it specifically
17707 // indicates an r+r address (used in conjunction with the 'y' modifier
17708 // in the replacement string). Currently, we're forcing the base
17709 // register to be r0 in the asm printer (which is interpreted as zero)
17710 // and forming the complete address in the second register. This is
17711 // suboptimal.
17712 return C_Memory;
17713 }
17714 } else if (Constraint == "wc") { // individual CR bits.
17715 return C_RegisterClass;
17716 } else if (Constraint == "wa" || Constraint == "wd" ||
17717 Constraint == "wf" || Constraint == "ws" ||
17718 Constraint == "wi" || Constraint == "ww") {
17719 return C_RegisterClass; // VSX registers.
17720 }
17721 return TargetLowering::getConstraintType(Constraint);
17722}
17723
17724/// Examine constraint type and operand type and determine a weight value.
17725/// This object must already have been set up with the operand type
17726/// and the current alternative constraint selected.
17729 AsmOperandInfo &info, const char *constraint) const {
17731 Value *CallOperandVal = info.CallOperandVal;
17732 // If we don't have a value, we can't do a match,
17733 // but allow it at the lowest weight.
17734 if (!CallOperandVal)
17735 return CW_Default;
17736 Type *type = CallOperandVal->getType();
17737
17738 // Look at the constraint type.
17739 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17740 return CW_Register; // an individual CR bit.
17741 else if ((StringRef(constraint) == "wa" ||
17742 StringRef(constraint) == "wd" ||
17743 StringRef(constraint) == "wf") &&
17744 type->isVectorTy())
17745 return CW_Register;
17746 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17747 return CW_Register; // just hold 64-bit integers data.
17748 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17749 return CW_Register;
17750 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17751 return CW_Register;
17752
17753 switch (*constraint) {
17754 default:
17756 break;
17757 case 'b':
17758 if (type->isIntegerTy())
17759 weight = CW_Register;
17760 break;
17761 case 'f':
17762 if (type->isFloatTy())
17763 weight = CW_Register;
17764 break;
17765 case 'd':
17766 if (type->isDoubleTy())
17767 weight = CW_Register;
17768 break;
17769 case 'v':
17770 if (type->isVectorTy())
17771 weight = CW_Register;
17772 break;
17773 case 'y':
17774 weight = CW_Register;
17775 break;
17776 case 'Z':
17777 weight = CW_Memory;
17778 break;
17779 }
17780 return weight;
17781}
17782
17783std::pair<unsigned, const TargetRegisterClass *>
17785 StringRef Constraint,
17786 MVT VT) const {
17787 if (Constraint.size() == 1) {
17788 // GCC RS6000 Constraint Letters
17789 switch (Constraint[0]) {
17790 case 'b': // R1-R31
17791 if (VT == MVT::i64 && Subtarget.isPPC64())
17792 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17793 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17794 case 'r': // R0-R31
17795 if (VT == MVT::i64 && Subtarget.isPPC64())
17796 return std::make_pair(0U, &PPC::G8RCRegClass);
17797 return std::make_pair(0U, &PPC::GPRCRegClass);
17798 // 'd' and 'f' constraints are both defined to be "the floating point
17799 // registers", where one is for 32-bit and the other for 64-bit. We don't
17800 // really care overly much here so just give them all the same reg classes.
17801 case 'd':
17802 case 'f':
17803 if (Subtarget.hasSPE()) {
17804 if (VT == MVT::f32 || VT == MVT::i32)
17805 return std::make_pair(0U, &PPC::GPRCRegClass);
17806 if (VT == MVT::f64 || VT == MVT::i64)
17807 return std::make_pair(0U, &PPC::SPERCRegClass);
17808 } else {
17809 if (VT == MVT::f32 || VT == MVT::i32)
17810 return std::make_pair(0U, &PPC::F4RCRegClass);
17811 if (VT == MVT::f64 || VT == MVT::i64)
17812 return std::make_pair(0U, &PPC::F8RCRegClass);
17813 }
17814 break;
17815 case 'v':
17816 if (Subtarget.hasAltivec() && VT.isVector())
17817 return std::make_pair(0U, &PPC::VRRCRegClass);
17818 else if (Subtarget.hasVSX())
17819 // Scalars in Altivec registers only make sense with VSX.
17820 return std::make_pair(0U, &PPC::VFRCRegClass);
17821 break;
17822 case 'y': // crrc
17823 return std::make_pair(0U, &PPC::CRRCRegClass);
17824 }
17825 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17826 // An individual CR bit.
17827 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17828 } else if ((Constraint == "wa" || Constraint == "wd" ||
17829 Constraint == "wf" || Constraint == "wi") &&
17830 Subtarget.hasVSX()) {
17831 // A VSX register for either a scalar (FP) or vector. There is no
17832 // support for single precision scalars on subtargets prior to Power8.
17833 if (VT.isVector())
17834 return std::make_pair(0U, &PPC::VSRCRegClass);
17835 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17836 return std::make_pair(0U, &PPC::VSSRCRegClass);
17837 return std::make_pair(0U, &PPC::VSFRCRegClass);
17838 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17839 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17840 return std::make_pair(0U, &PPC::VSSRCRegClass);
17841 else
17842 return std::make_pair(0U, &PPC::VSFRCRegClass);
17843 } else if (Constraint == "lr") {
17844 if (VT == MVT::i64)
17845 return std::make_pair(0U, &PPC::LR8RCRegClass);
17846 else
17847 return std::make_pair(0U, &PPC::LRRCRegClass);
17848 }
17849
17850 // Handle special cases of physical registers that are not properly handled
17851 // by the base class.
17852 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17853 // If we name a VSX register, we can't defer to the base class because it
17854 // will not recognize the correct register (their names will be VSL{0-31}
17855 // and V{0-31} so they won't match). So we match them here.
17856 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17857 int VSNum = atoi(Constraint.data() + 3);
17858 assert(VSNum >= 0 && VSNum <= 63 &&
17859 "Attempted to access a vsr out of range");
17860 if (VSNum < 32)
17861 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17862 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17863 }
17864
17865 // For float registers, we can't defer to the base class as it will match
17866 // the SPILLTOVSRRC class.
17867 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17868 int RegNum = atoi(Constraint.data() + 2);
17869 if (RegNum > 31 || RegNum < 0)
17870 report_fatal_error("Invalid floating point register number");
17871 if (VT == MVT::f32 || VT == MVT::i32)
17872 return Subtarget.hasSPE()
17873 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17874 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17875 if (VT == MVT::f64 || VT == MVT::i64)
17876 return Subtarget.hasSPE()
17877 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17878 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17879 }
17880 }
17881
17882 std::pair<unsigned, const TargetRegisterClass *> R =
17884
17885 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17886 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17887 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17888 // register.
17889 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17890 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17891 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17892 PPC::GPRCRegClass.contains(R.first))
17893 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17894 PPC::sub_32, &PPC::G8RCRegClass),
17895 &PPC::G8RCRegClass);
17896
17897 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17898 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17899 R.first = PPC::CR0;
17900 R.second = &PPC::CRRCRegClass;
17901 }
17902 // FIXME: This warning should ideally be emitted in the front end.
17903 const auto &TM = getTargetMachine();
17904 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17905 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17906 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17907 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17908 errs() << "warning: vector registers 20 to 32 are reserved in the "
17909 "default AIX AltiVec ABI and cannot be used\n";
17910 }
17911
17912 return R;
17913}
17914
17915/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17916/// vector. If it is invalid, don't add anything to Ops.
17918 StringRef Constraint,
17919 std::vector<SDValue> &Ops,
17920 SelectionDAG &DAG) const {
17921 SDValue Result;
17922
17923 // Only support length 1 constraints.
17924 if (Constraint.size() > 1)
17925 return;
17926
17927 char Letter = Constraint[0];
17928 switch (Letter) {
17929 default: break;
17930 case 'I':
17931 case 'J':
17932 case 'K':
17933 case 'L':
17934 case 'M':
17935 case 'N':
17936 case 'O':
17937 case 'P': {
17938 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17939 if (!CST) return; // Must be an immediate to match.
17940 SDLoc dl(Op);
17941 int64_t Value = CST->getSExtValue();
17942 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17943 // numbers are printed as such.
17944 switch (Letter) {
17945 default: llvm_unreachable("Unknown constraint letter!");
17946 case 'I': // "I" is a signed 16-bit constant.
17947 if (isInt<16>(Value))
17948 Result = DAG.getTargetConstant(Value, dl, TCVT);
17949 break;
17950 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17951 if (isShiftedUInt<16, 16>(Value))
17952 Result = DAG.getTargetConstant(Value, dl, TCVT);
17953 break;
17954 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17955 if (isShiftedInt<16, 16>(Value))
17956 Result = DAG.getTargetConstant(Value, dl, TCVT);
17957 break;
17958 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17959 if (isUInt<16>(Value))
17960 Result = DAG.getTargetConstant(Value, dl, TCVT);
17961 break;
17962 case 'M': // "M" is a constant that is greater than 31.
17963 if (Value > 31)
17964 Result = DAG.getTargetConstant(Value, dl, TCVT);
17965 break;
17966 case 'N': // "N" is a positive constant that is an exact power of two.
17967 if (Value > 0 && isPowerOf2_64(Value))
17968 Result = DAG.getTargetConstant(Value, dl, TCVT);
17969 break;
17970 case 'O': // "O" is the constant zero.
17971 if (Value == 0)
17972 Result = DAG.getTargetConstant(Value, dl, TCVT);
17973 break;
17974 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17975 if (isInt<16>(-Value))
17976 Result = DAG.getTargetConstant(Value, dl, TCVT);
17977 break;
17978 }
17979 break;
17980 }
17981 }
17982
17983 if (Result.getNode()) {
17984 Ops.push_back(Result);
17985 return;
17986 }
17987
17988 // Handle standard constraint letters.
17989 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17990}
17991
17994 SelectionDAG &DAG) const {
17995 if (I.getNumOperands() <= 1)
17996 return;
17997 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17998 return;
17999 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18000 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18001 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18002 return;
18003
18004 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18005 Ops.push_back(DAG.getMDNode(MDN));
18006}
18007
18008// isLegalAddressingMode - Return true if the addressing mode represented
18009// by AM is legal for this target, for a load/store of the specified type.
18011 const AddrMode &AM, Type *Ty,
18012 unsigned AS,
18013 Instruction *I) const {
18014 // Vector type r+i form is supported since power9 as DQ form. We don't check
18015 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18016 // imm form is preferred and the offset can be adjusted to use imm form later
18017 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18018 // max offset to check legal addressing mode, we should be a little aggressive
18019 // to contain other offsets for that LSRUse.
18020 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18021 return false;
18022
18023 // PPC allows a sign-extended 16-bit immediate field.
18024 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18025 return false;
18026
18027 // No global is ever allowed as a base.
18028 if (AM.BaseGV)
18029 return false;
18030
18031 // PPC only support r+r,
18032 switch (AM.Scale) {
18033 case 0: // "r+i" or just "i", depending on HasBaseReg.
18034 break;
18035 case 1:
18036 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18037 return false;
18038 // Otherwise we have r+r or r+i.
18039 break;
18040 case 2:
18041 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18042 return false;
18043 // Allow 2*r as r+r.
18044 break;
18045 default:
18046 // No other scales are supported.
18047 return false;
18048 }
18049
18050 return true;
18051}
18052
18053SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18054 SelectionDAG &DAG) const {
18056 MachineFrameInfo &MFI = MF.getFrameInfo();
18057 MFI.setReturnAddressIsTaken(true);
18058
18059 SDLoc dl(Op);
18060 unsigned Depth = Op.getConstantOperandVal(0);
18061
18062 // Make sure the function does not optimize away the store of the RA to
18063 // the stack.
18064 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18065 FuncInfo->setLRStoreRequired();
18066 auto PtrVT = getPointerTy(MF.getDataLayout());
18067
18068 if (Depth > 0) {
18069 // The link register (return address) is saved in the caller's frame
18070 // not the callee's stack frame. So we must get the caller's frame
18071 // address and load the return address at the LR offset from there.
18072 SDValue FrameAddr =
18073 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18074 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
18075 SDValue Offset =
18076 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18077 Subtarget.getScalarIntVT());
18078 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18079 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18081 }
18082
18083 // Just load the return address off the stack.
18084 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18085 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18087}
18088
18089SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18090 SelectionDAG &DAG) const {
18091 SDLoc dl(Op);
18092 unsigned Depth = Op.getConstantOperandVal(0);
18093
18095 MachineFrameInfo &MFI = MF.getFrameInfo();
18096 MFI.setFrameAddressIsTaken(true);
18097
18098 EVT PtrVT = getPointerTy(MF.getDataLayout());
18099 bool isPPC64 = PtrVT == MVT::i64;
18100
18101 // Naked functions never have a frame pointer, and so we use r1. For all
18102 // other functions, this decision must be delayed until during PEI.
18103 unsigned FrameReg;
18104 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18105 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18106 else
18107 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18108
18109 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18110 PtrVT);
18111 while (Depth--)
18112 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18113 FrameAddr, MachinePointerInfo());
18114 return FrameAddr;
18115}
18116
18117#define GET_REGISTER_MATCHER
18118#include "PPCGenAsmMatcher.inc"
18119
18121 const MachineFunction &MF) const {
18122 bool IsPPC64 = Subtarget.isPPC64();
18123
18124 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18125 if (!Is64Bit && VT != LLT::scalar(32))
18126 report_fatal_error("Invalid register global variable type");
18127
18129 if (!Reg)
18130 return Reg;
18131
18132 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18133 // Need followup investigation as to why.
18134 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18135 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18136 StringRef(RegName) + "\"."));
18137
18138 // Convert GPR to GP8R register for 64bit.
18139 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18140 Reg = Reg.id() - PPC::R0 + PPC::X0;
18141
18142 return Reg;
18143}
18144
18146 // 32-bit SVR4 ABI access everything as got-indirect.
18147 if (Subtarget.is32BitELFABI())
18148 return true;
18149
18150 // AIX accesses everything indirectly through the TOC, which is similar to
18151 // the GOT.
18152 if (Subtarget.isAIXABI())
18153 return true;
18154
18156 // If it is small or large code model, module locals are accessed
18157 // indirectly by loading their address from .toc/.got.
18158 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18159 return true;
18160
18161 // JumpTable and BlockAddress are accessed as got-indirect.
18162 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
18163 return true;
18164
18165 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
18166 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18167
18168 return false;
18169}
18170
18171bool
18173 // The PowerPC target isn't yet aware of offsets.
18174 return false;
18175}
18176
18178 const CallInst &I,
18179 MachineFunction &MF,
18180 unsigned Intrinsic) const {
18181 switch (Intrinsic) {
18182 case Intrinsic::ppc_atomicrmw_xchg_i128:
18183 case Intrinsic::ppc_atomicrmw_add_i128:
18184 case Intrinsic::ppc_atomicrmw_sub_i128:
18185 case Intrinsic::ppc_atomicrmw_nand_i128:
18186 case Intrinsic::ppc_atomicrmw_and_i128:
18187 case Intrinsic::ppc_atomicrmw_or_i128:
18188 case Intrinsic::ppc_atomicrmw_xor_i128:
18189 case Intrinsic::ppc_cmpxchg_i128:
18191 Info.memVT = MVT::i128;
18192 Info.ptrVal = I.getArgOperand(0);
18193 Info.offset = 0;
18194 Info.align = Align(16);
18197 return true;
18198 case Intrinsic::ppc_atomic_load_i128:
18200 Info.memVT = MVT::i128;
18201 Info.ptrVal = I.getArgOperand(0);
18202 Info.offset = 0;
18203 Info.align = Align(16);
18205 return true;
18206 case Intrinsic::ppc_atomic_store_i128:
18208 Info.memVT = MVT::i128;
18209 Info.ptrVal = I.getArgOperand(2);
18210 Info.offset = 0;
18211 Info.align = Align(16);
18213 return true;
18214 case Intrinsic::ppc_altivec_lvx:
18215 case Intrinsic::ppc_altivec_lvxl:
18216 case Intrinsic::ppc_altivec_lvebx:
18217 case Intrinsic::ppc_altivec_lvehx:
18218 case Intrinsic::ppc_altivec_lvewx:
18219 case Intrinsic::ppc_vsx_lxvd2x:
18220 case Intrinsic::ppc_vsx_lxvw4x:
18221 case Intrinsic::ppc_vsx_lxvd2x_be:
18222 case Intrinsic::ppc_vsx_lxvw4x_be:
18223 case Intrinsic::ppc_vsx_lxvl:
18224 case Intrinsic::ppc_vsx_lxvll: {
18225 EVT VT;
18226 switch (Intrinsic) {
18227 case Intrinsic::ppc_altivec_lvebx:
18228 VT = MVT::i8;
18229 break;
18230 case Intrinsic::ppc_altivec_lvehx:
18231 VT = MVT::i16;
18232 break;
18233 case Intrinsic::ppc_altivec_lvewx:
18234 VT = MVT::i32;
18235 break;
18236 case Intrinsic::ppc_vsx_lxvd2x:
18237 case Intrinsic::ppc_vsx_lxvd2x_be:
18238 VT = MVT::v2f64;
18239 break;
18240 default:
18241 VT = MVT::v4i32;
18242 break;
18243 }
18244
18246 Info.memVT = VT;
18247 Info.ptrVal = I.getArgOperand(0);
18248 Info.offset = -VT.getStoreSize()+1;
18249 Info.size = 2*VT.getStoreSize()-1;
18250 Info.align = Align(1);
18252 return true;
18253 }
18254 case Intrinsic::ppc_altivec_stvx:
18255 case Intrinsic::ppc_altivec_stvxl:
18256 case Intrinsic::ppc_altivec_stvebx:
18257 case Intrinsic::ppc_altivec_stvehx:
18258 case Intrinsic::ppc_altivec_stvewx:
18259 case Intrinsic::ppc_vsx_stxvd2x:
18260 case Intrinsic::ppc_vsx_stxvw4x:
18261 case Intrinsic::ppc_vsx_stxvd2x_be:
18262 case Intrinsic::ppc_vsx_stxvw4x_be:
18263 case Intrinsic::ppc_vsx_stxvl:
18264 case Intrinsic::ppc_vsx_stxvll: {
18265 EVT VT;
18266 switch (Intrinsic) {
18267 case Intrinsic::ppc_altivec_stvebx:
18268 VT = MVT::i8;
18269 break;
18270 case Intrinsic::ppc_altivec_stvehx:
18271 VT = MVT::i16;
18272 break;
18273 case Intrinsic::ppc_altivec_stvewx:
18274 VT = MVT::i32;
18275 break;
18276 case Intrinsic::ppc_vsx_stxvd2x:
18277 case Intrinsic::ppc_vsx_stxvd2x_be:
18278 VT = MVT::v2f64;
18279 break;
18280 default:
18281 VT = MVT::v4i32;
18282 break;
18283 }
18284
18286 Info.memVT = VT;
18287 Info.ptrVal = I.getArgOperand(1);
18288 Info.offset = -VT.getStoreSize()+1;
18289 Info.size = 2*VT.getStoreSize()-1;
18290 Info.align = Align(1);
18292 return true;
18293 }
18294 case Intrinsic::ppc_stdcx:
18295 case Intrinsic::ppc_stwcx:
18296 case Intrinsic::ppc_sthcx:
18297 case Intrinsic::ppc_stbcx: {
18298 EVT VT;
18299 auto Alignment = Align(8);
18300 switch (Intrinsic) {
18301 case Intrinsic::ppc_stdcx:
18302 VT = MVT::i64;
18303 break;
18304 case Intrinsic::ppc_stwcx:
18305 VT = MVT::i32;
18306 Alignment = Align(4);
18307 break;
18308 case Intrinsic::ppc_sthcx:
18309 VT = MVT::i16;
18310 Alignment = Align(2);
18311 break;
18312 case Intrinsic::ppc_stbcx:
18313 VT = MVT::i8;
18314 Alignment = Align(1);
18315 break;
18316 }
18318 Info.memVT = VT;
18319 Info.ptrVal = I.getArgOperand(0);
18320 Info.offset = 0;
18321 Info.align = Alignment;
18323 return true;
18324 }
18325 default:
18326 break;
18327 }
18328
18329 return false;
18330}
18331
18332/// It returns EVT::Other if the type should be determined using generic
18333/// target-independent logic.
18335 LLVMContext &Context, const MemOp &Op,
18336 const AttributeList &FuncAttributes) const {
18337 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18338 // We should use Altivec/VSX loads and stores when available. For unaligned
18339 // addresses, unaligned VSX loads are only fast starting with the P8.
18340 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18341 if (Op.isMemset() && Subtarget.hasVSX()) {
18342 uint64_t TailSize = Op.size() % 16;
18343 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18344 // element if vector element type matches tail store. For tail size
18345 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18346 if (TailSize > 2 && TailSize <= 4) {
18347 return MVT::v8i16;
18348 }
18349 return MVT::v4i32;
18350 }
18351 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18352 return MVT::v4i32;
18353 }
18354 }
18355
18356 if (Subtarget.isPPC64()) {
18357 return MVT::i64;
18358 }
18359
18360 return MVT::i32;
18361}
18362
18363/// Returns true if it is beneficial to convert a load of a constant
18364/// to just the constant itself.
18366 Type *Ty) const {
18367 assert(Ty->isIntegerTy());
18368
18369 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18370 return !(BitSize == 0 || BitSize > 64);
18371}
18372
18374 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18375 return false;
18376 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18377 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18378 return NumBits1 == 64 && NumBits2 == 32;
18379}
18380
18382 if (!VT1.isInteger() || !VT2.isInteger())
18383 return false;
18384 unsigned NumBits1 = VT1.getSizeInBits();
18385 unsigned NumBits2 = VT2.getSizeInBits();
18386 return NumBits1 == 64 && NumBits2 == 32;
18387}
18388
18390 // Generally speaking, zexts are not free, but they are free when they can be
18391 // folded with other operations.
18392 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18393 EVT MemVT = LD->getMemoryVT();
18394 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18395 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18396 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18397 LD->getExtensionType() == ISD::ZEXTLOAD))
18398 return true;
18399 }
18400
18401 // FIXME: Add other cases...
18402 // - 32-bit shifts with a zext to i64
18403 // - zext after ctlz, bswap, etc.
18404 // - zext after and by a constant mask
18405
18406 return TargetLowering::isZExtFree(Val, VT2);
18407}
18408
18409bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18410 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18411 "invalid fpext types");
18412 // Extending to float128 is not free.
18413 if (DestVT == MVT::f128)
18414 return false;
18415 return true;
18416}
18417
18419 return isInt<16>(Imm) || isUInt<16>(Imm);
18420}
18421
18423 return isInt<16>(Imm) || isUInt<16>(Imm);
18424}
18425
18428 unsigned *Fast) const {
18430 return false;
18431
18432 // PowerPC supports unaligned memory access for simple non-vector types.
18433 // Although accessing unaligned addresses is not as efficient as accessing
18434 // aligned addresses, it is generally more efficient than manual expansion,
18435 // and generally only traps for software emulation when crossing page
18436 // boundaries.
18437
18438 if (!VT.isSimple())
18439 return false;
18440
18441 if (VT.isFloatingPoint() && !VT.isVector() &&
18442 !Subtarget.allowsUnalignedFPAccess())
18443 return false;
18444
18445 if (VT.getSimpleVT().isVector()) {
18446 if (Subtarget.hasVSX()) {
18447 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18448 VT != MVT::v4f32 && VT != MVT::v4i32)
18449 return false;
18450 } else {
18451 return false;
18452 }
18453 }
18454
18455 if (VT == MVT::ppcf128)
18456 return false;
18457
18458 if (Fast)
18459 *Fast = 1;
18460
18461 return true;
18462}
18463
18465 SDValue C) const {
18466 // Check integral scalar types.
18467 if (!VT.isScalarInteger())
18468 return false;
18469 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18470 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18471 return false;
18472 // This transformation will generate >= 2 operations. But the following
18473 // cases will generate <= 2 instructions during ISEL. So exclude them.
18474 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18475 // HW instruction, ie. MULLI
18476 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18477 // instruction is needed than case 1, ie. MULLI and RLDICR
18478 int64_t Imm = ConstNode->getSExtValue();
18479 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18480 Imm >>= Shift;
18481 if (isInt<16>(Imm))
18482 return false;
18483 uint64_t UImm = static_cast<uint64_t>(Imm);
18484 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18485 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18486 return true;
18487 }
18488 return false;
18489}
18490
18492 EVT VT) const {
18495}
18496
18498 Type *Ty) const {
18499 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18500 return false;
18501 switch (Ty->getScalarType()->getTypeID()) {
18502 case Type::FloatTyID:
18503 case Type::DoubleTyID:
18504 return true;
18505 case Type::FP128TyID:
18506 return Subtarget.hasP9Vector();
18507 default:
18508 return false;
18509 }
18510}
18511
18512// FIXME: add more patterns which are not profitable to hoist.
18514 if (!I->hasOneUse())
18515 return true;
18516
18517 Instruction *User = I->user_back();
18518 assert(User && "A single use instruction with no uses.");
18519
18520 switch (I->getOpcode()) {
18521 case Instruction::FMul: {
18522 // Don't break FMA, PowerPC prefers FMA.
18523 if (User->getOpcode() != Instruction::FSub &&
18524 User->getOpcode() != Instruction::FAdd)
18525 return true;
18526
18528 const Function *F = I->getFunction();
18529 const DataLayout &DL = F->getDataLayout();
18530 Type *Ty = User->getOperand(0)->getType();
18531
18532 return !(
18535 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
18536 }
18537 case Instruction::Load: {
18538 // Don't break "store (load float*)" pattern, this pattern will be combined
18539 // to "store (load int32)" in later InstCombine pass. See function
18540 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18541 // cycles than loading a 32 bit integer.
18542 LoadInst *LI = cast<LoadInst>(I);
18543 // For the loads that combineLoadToOperationType does nothing, like
18544 // ordered load, it should be profitable to hoist them.
18545 // For swifterror load, it can only be used for pointer to pointer type, so
18546 // later type check should get rid of this case.
18547 if (!LI->isUnordered())
18548 return true;
18549
18550 if (User->getOpcode() != Instruction::Store)
18551 return true;
18552
18553 if (I->getType()->getTypeID() != Type::FloatTyID)
18554 return true;
18555
18556 return false;
18557 }
18558 default:
18559 return true;
18560 }
18561 return true;
18562}
18563
18564const MCPhysReg *
18566 // LR is a callee-save register, but we must treat it as clobbered by any call
18567 // site. Hence we include LR in the scratch registers, which are in turn added
18568 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18569 // to CTR, which is used by any indirect call.
18570 static const MCPhysReg ScratchRegs[] = {
18571 PPC::X12, PPC::LR8, PPC::CTR8, 0
18572 };
18573
18574 return ScratchRegs;
18575}
18576
18578 const Constant *PersonalityFn) const {
18579 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18580}
18581
18583 const Constant *PersonalityFn) const {
18584 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18585}
18586
18587bool
18589 EVT VT , unsigned DefinedValues) const {
18590 if (VT == MVT::v2i64)
18591 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18592
18593 if (Subtarget.hasVSX())
18594 return true;
18595
18597}
18598
18600 if (DisableILPPref || Subtarget.enableMachineScheduler())
18602
18603 return Sched::ILP;
18604}
18605
18606// Create a fast isel object.
18607FastISel *
18609 const TargetLibraryInfo *LibInfo) const {
18610 return PPC::createFastISel(FuncInfo, LibInfo);
18611}
18612
18613// 'Inverted' means the FMA opcode after negating one multiplicand.
18614// For example, (fma -a b c) = (fnmsub a b c)
18615static unsigned invertFMAOpcode(unsigned Opc) {
18616 switch (Opc) {
18617 default:
18618 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18619 case ISD::FMA:
18620 return PPCISD::FNMSUB;
18621 case PPCISD::FNMSUB:
18622 return ISD::FMA;
18623 }
18624}
18625
18627 bool LegalOps, bool OptForSize,
18629 unsigned Depth) const {
18631 return SDValue();
18632
18633 unsigned Opc = Op.getOpcode();
18634 EVT VT = Op.getValueType();
18635 SDNodeFlags Flags = Op.getNode()->getFlags();
18636
18637 switch (Opc) {
18638 case PPCISD::FNMSUB:
18639 if (!Op.hasOneUse() || !isTypeLegal(VT))
18640 break;
18641
18643 SDValue N0 = Op.getOperand(0);
18644 SDValue N1 = Op.getOperand(1);
18645 SDValue N2 = Op.getOperand(2);
18646 SDLoc Loc(Op);
18647
18649 SDValue NegN2 =
18650 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18651
18652 if (!NegN2)
18653 return SDValue();
18654
18655 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18656 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18657 // These transformations may change sign of zeroes. For example,
18658 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18659 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18660 // Try and choose the cheaper one to negate.
18662 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18663 N0Cost, Depth + 1);
18664
18666 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18667 N1Cost, Depth + 1);
18668
18669 if (NegN0 && N0Cost <= N1Cost) {
18670 Cost = std::min(N0Cost, N2Cost);
18671 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18672 } else if (NegN1) {
18673 Cost = std::min(N1Cost, N2Cost);
18674 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18675 }
18676 }
18677
18678 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18679 if (isOperationLegal(ISD::FMA, VT)) {
18680 Cost = N2Cost;
18681 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18682 }
18683
18684 break;
18685 }
18686
18687 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18688 Cost, Depth);
18689}
18690
18691// Override to enable LOAD_STACK_GUARD lowering on Linux.
18693 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18694 return true;
18696}
18697
18699 bool ForCodeSize) const {
18700 if (!VT.isSimple() || !Subtarget.hasVSX())
18701 return false;
18702
18703 switch(VT.getSimpleVT().SimpleTy) {
18704 default:
18705 // For FP types that are currently not supported by PPC backend, return
18706 // false. Examples: f16, f80.
18707 return false;
18708 case MVT::f32:
18709 case MVT::f64: {
18710 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18711 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18712 return true;
18713 }
18714 bool IsExact;
18715 APSInt IntResult(16, false);
18716 // The rounding mode doesn't really matter because we only care about floats
18717 // that can be converted to integers exactly.
18718 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18719 // For exact values in the range [-16, 15] we can materialize the float.
18720 if (IsExact && IntResult <= 15 && IntResult >= -16)
18721 return true;
18722 return Imm.isZero();
18723 }
18724 case MVT::ppcf128:
18725 return Imm.isPosZero();
18726 }
18727}
18728
18729// For vector shift operation op, fold
18730// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18732 SelectionDAG &DAG) {
18733 SDValue N0 = N->getOperand(0);
18734 SDValue N1 = N->getOperand(1);
18735 EVT VT = N0.getValueType();
18736 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18737 unsigned Opcode = N->getOpcode();
18738 unsigned TargetOpcode;
18739
18740 switch (Opcode) {
18741 default:
18742 llvm_unreachable("Unexpected shift operation");
18743 case ISD::SHL:
18744 TargetOpcode = PPCISD::SHL;
18745 break;
18746 case ISD::SRL:
18747 TargetOpcode = PPCISD::SRL;
18748 break;
18749 case ISD::SRA:
18750 TargetOpcode = PPCISD::SRA;
18751 break;
18752 }
18753
18754 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18755 N1->getOpcode() == ISD::AND)
18756 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18757 if (Mask->getZExtValue() == OpSizeInBits - 1)
18758 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18759
18760 return SDValue();
18761}
18762
18763SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
18764 DAGCombinerInfo &DCI) const {
18765 EVT VT = N->getValueType(0);
18766 assert(VT.isVector() && "Vector type expected.");
18767
18768 unsigned Opc = N->getOpcode();
18769 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
18770 "Unexpected opcode.");
18771
18772 if (!isOperationLegal(Opc, VT))
18773 return SDValue();
18774
18775 EVT EltTy = VT.getScalarType();
18776 unsigned EltBits = EltTy.getSizeInBits();
18777 if (EltTy != MVT::i64 && EltTy != MVT::i32)
18778 return SDValue();
18779
18780 SDValue N1 = N->getOperand(1);
18781 uint64_t SplatBits = 0;
18782 bool AddSplatCase = false;
18783 unsigned OpcN1 = N1.getOpcode();
18784 if (OpcN1 == PPCISD::VADD_SPLAT &&
18786 AddSplatCase = true;
18787 SplatBits = N1.getConstantOperandVal(0);
18788 }
18789
18790 if (!AddSplatCase) {
18791 if (OpcN1 != ISD::BUILD_VECTOR)
18792 return SDValue();
18793
18794 unsigned SplatBitSize;
18795 bool HasAnyUndefs;
18796 APInt APSplatBits, APSplatUndef;
18797 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
18798 bool BVNIsConstantSplat =
18799 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
18800 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
18801 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
18802 return SDValue();
18803 SplatBits = APSplatBits.getZExtValue();
18804 }
18805
18806 SDLoc DL(N);
18807 SDValue N0 = N->getOperand(0);
18808 // PPC vector shifts by word/double look at only the low 5/6 bits of the
18809 // shift vector, which means the max value is 31/63. A shift vector of all
18810 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
18811 // -16 to 15 range.
18812 if (SplatBits == (EltBits - 1)) {
18813 unsigned NewOpc;
18814 switch (Opc) {
18815 case ISD::SHL:
18816 NewOpc = PPCISD::SHL;
18817 break;
18818 case ISD::SRL:
18819 NewOpc = PPCISD::SRL;
18820 break;
18821 case ISD::SRA:
18822 NewOpc = PPCISD::SRA;
18823 break;
18824 }
18825 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
18826 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
18827 }
18828
18829 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
18830 return SDValue();
18831
18832 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
18833 // before the BUILD_VECTOR is replaced by a load.
18834 if (EltTy != MVT::i64 || SplatBits != 1)
18835 return SDValue();
18836
18837 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
18838}
18839
18840SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18841 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18842 return Value;
18843
18844 if (N->getValueType(0).isVector())
18845 return combineVectorShift(N, DCI);
18846
18847 SDValue N0 = N->getOperand(0);
18848 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18849 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18850 N0.getOpcode() != ISD::SIGN_EXTEND ||
18851 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18852 N->getValueType(0) != MVT::i64)
18853 return SDValue();
18854
18855 // We can't save an operation here if the value is already extended, and
18856 // the existing shift is easier to combine.
18857 SDValue ExtsSrc = N0.getOperand(0);
18858 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18859 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18860 return SDValue();
18861
18862 SDLoc DL(N0);
18863 SDValue ShiftBy = SDValue(CN1, 0);
18864 // We want the shift amount to be i32 on the extswli, but the shift could
18865 // have an i64.
18866 if (ShiftBy.getValueType() == MVT::i64)
18867 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18868
18869 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18870 ShiftBy);
18871}
18872
18873SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18874 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18875 return Value;
18876
18877 if (N->getValueType(0).isVector())
18878 return combineVectorShift(N, DCI);
18879
18880 return SDValue();
18881}
18882
18883SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18884 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18885 return Value;
18886
18887 if (N->getValueType(0).isVector())
18888 return combineVectorShift(N, DCI);
18889
18890 return SDValue();
18891}
18892
18893// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18894// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18895// When C is zero, the equation (addi Z, -C) can be simplified to Z
18896// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18898 const PPCSubtarget &Subtarget) {
18899 if (!Subtarget.isPPC64())
18900 return SDValue();
18901
18902 SDValue LHS = N->getOperand(0);
18903 SDValue RHS = N->getOperand(1);
18904
18905 auto isZextOfCompareWithConstant = [](SDValue Op) {
18906 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18907 Op.getValueType() != MVT::i64)
18908 return false;
18909
18910 SDValue Cmp = Op.getOperand(0);
18911 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18912 Cmp.getOperand(0).getValueType() != MVT::i64)
18913 return false;
18914
18915 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18916 int64_t NegConstant = 0 - Constant->getSExtValue();
18917 // Due to the limitations of the addi instruction,
18918 // -C is required to be [-32768, 32767].
18919 return isInt<16>(NegConstant);
18920 }
18921
18922 return false;
18923 };
18924
18925 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18926 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18927
18928 // If there is a pattern, canonicalize a zext operand to the RHS.
18929 if (LHSHasPattern && !RHSHasPattern)
18930 std::swap(LHS, RHS);
18931 else if (!LHSHasPattern && !RHSHasPattern)
18932 return SDValue();
18933
18934 SDLoc DL(N);
18935 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
18936 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
18937 SDValue Cmp = RHS.getOperand(0);
18938 SDValue Z = Cmp.getOperand(0);
18939 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18940 int64_t NegConstant = 0 - Constant->getSExtValue();
18941
18942 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18943 default: break;
18944 case ISD::SETNE: {
18945 // when C == 0
18946 // --> addze X, (addic Z, -1).carry
18947 // /
18948 // add X, (zext(setne Z, C))--
18949 // \ when -32768 <= -C <= 32767 && C != 0
18950 // --> addze X, (addic (addi Z, -C), -1).carry
18951 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18952 DAG.getConstant(NegConstant, DL, MVT::i64));
18953 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18954 SDValue Addc =
18955 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
18956 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
18957 DAG.getConstant(0, DL, CarryType));
18958 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
18959 DAG.getConstant(0, DL, MVT::i64),
18960 SDValue(Addc.getNode(), 1));
18961 }
18962 case ISD::SETEQ: {
18963 // when C == 0
18964 // --> addze X, (subfic Z, 0).carry
18965 // /
18966 // add X, (zext(sete Z, C))--
18967 // \ when -32768 <= -C <= 32767 && C != 0
18968 // --> addze X, (subfic (addi Z, -C), 0).carry
18969 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18970 DAG.getConstant(NegConstant, DL, MVT::i64));
18971 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18972 SDValue Subc =
18973 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
18974 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
18975 DAG.getConstant(0, DL, CarryType));
18976 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
18977 DAG.getConstant(1UL, DL, CarryType));
18978 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
18979 DAG.getConstant(0, DL, MVT::i64), Invert);
18980 }
18981 }
18982
18983 return SDValue();
18984}
18985
18986// Transform
18987// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18988// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18989// In this case both C1 and C2 must be known constants.
18990// C1+C2 must fit into a 34 bit signed integer.
18992 const PPCSubtarget &Subtarget) {
18993 if (!Subtarget.isUsingPCRelativeCalls())
18994 return SDValue();
18995
18996 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18997 // If we find that node try to cast the Global Address and the Constant.
18998 SDValue LHS = N->getOperand(0);
18999 SDValue RHS = N->getOperand(1);
19000
19001 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19002 std::swap(LHS, RHS);
19003
19004 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19005 return SDValue();
19006
19007 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19008 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
19009 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
19010
19011 // Check that both casts succeeded.
19012 if (!GSDN || !ConstNode)
19013 return SDValue();
19014
19015 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19016 SDLoc DL(GSDN);
19017
19018 // The signed int offset needs to fit in 34 bits.
19019 if (!isInt<34>(NewOffset))
19020 return SDValue();
19021
19022 // The new global address is a copy of the old global address except
19023 // that it has the updated Offset.
19024 SDValue GA =
19025 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19026 NewOffset, GSDN->getTargetFlags());
19027 SDValue MatPCRel =
19028 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19029 return MatPCRel;
19030}
19031
19032SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19033 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19034 return Value;
19035
19036 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19037 return Value;
19038
19039 return SDValue();
19040}
19041
19042// Detect TRUNCATE operations on bitcasts of float128 values.
19043// What we are looking for here is the situtation where we extract a subset
19044// of bits from a 128 bit float.
19045// This can be of two forms:
19046// 1) BITCAST of f128 feeding TRUNCATE
19047// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19048// The reason this is required is because we do not have a legal i128 type
19049// and so we want to prevent having to store the f128 and then reload part
19050// of it.
19051SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19052 DAGCombinerInfo &DCI) const {
19053 // If we are using CRBits then try that first.
19054 if (Subtarget.useCRBits()) {
19055 // Check if CRBits did anything and return that if it did.
19056 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19057 return CRTruncValue;
19058 }
19059
19060 SDLoc dl(N);
19061 SDValue Op0 = N->getOperand(0);
19062
19063 // Looking for a truncate of i128 to i64.
19064 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19065 return SDValue();
19066
19067 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19068
19069 // SRL feeding TRUNCATE.
19070 if (Op0.getOpcode() == ISD::SRL) {
19071 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19072 // The right shift has to be by 64 bits.
19073 if (!ConstNode || ConstNode->getZExtValue() != 64)
19074 return SDValue();
19075
19076 // Switch the element number to extract.
19077 EltToExtract = EltToExtract ? 0 : 1;
19078 // Update Op0 past the SRL.
19079 Op0 = Op0.getOperand(0);
19080 }
19081
19082 // BITCAST feeding a TRUNCATE possibly via SRL.
19083 if (Op0.getOpcode() == ISD::BITCAST &&
19084 Op0.getValueType() == MVT::i128 &&
19085 Op0.getOperand(0).getValueType() == MVT::f128) {
19086 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19087 return DCI.DAG.getNode(
19088 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19089 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19090 }
19091 return SDValue();
19092}
19093
19094SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19095 SelectionDAG &DAG = DCI.DAG;
19096
19097 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19098 if (!ConstOpOrElement)
19099 return SDValue();
19100
19101 // An imul is usually smaller than the alternative sequence for legal type.
19103 isOperationLegal(ISD::MUL, N->getValueType(0)))
19104 return SDValue();
19105
19106 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19107 switch (this->Subtarget.getCPUDirective()) {
19108 default:
19109 // TODO: enhance the condition for subtarget before pwr8
19110 return false;
19111 case PPC::DIR_PWR8:
19112 // type mul add shl
19113 // scalar 4 1 1
19114 // vector 7 2 2
19115 return true;
19116 case PPC::DIR_PWR9:
19117 case PPC::DIR_PWR10:
19118 case PPC::DIR_PWR11:
19120 // type mul add shl
19121 // scalar 5 2 2
19122 // vector 7 2 2
19123
19124 // The cycle RATIO of related operations are showed as a table above.
19125 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19126 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19127 // are 4, it is always profitable; but for 3 instrs patterns
19128 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19129 // So we should only do it for vector type.
19130 return IsAddOne && IsNeg ? VT.isVector() : true;
19131 }
19132 };
19133
19134 EVT VT = N->getValueType(0);
19135 SDLoc DL(N);
19136
19137 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19138 bool IsNeg = MulAmt.isNegative();
19139 APInt MulAmtAbs = MulAmt.abs();
19140
19141 if ((MulAmtAbs - 1).isPowerOf2()) {
19142 // (mul x, 2^N + 1) => (add (shl x, N), x)
19143 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19144
19145 if (!IsProfitable(IsNeg, true, VT))
19146 return SDValue();
19147
19148 SDValue Op0 = N->getOperand(0);
19149 SDValue Op1 =
19150 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19151 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19152 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19153
19154 if (!IsNeg)
19155 return Res;
19156
19157 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19158 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19159 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19160 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19161
19162 if (!IsProfitable(IsNeg, false, VT))
19163 return SDValue();
19164
19165 SDValue Op0 = N->getOperand(0);
19166 SDValue Op1 =
19167 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19168 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19169
19170 if (!IsNeg)
19171 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19172 else
19173 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19174
19175 } else {
19176 return SDValue();
19177 }
19178}
19179
19180// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19181// in combiner since we need to check SD flags and other subtarget features.
19182SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19183 DAGCombinerInfo &DCI) const {
19184 SDValue N0 = N->getOperand(0);
19185 SDValue N1 = N->getOperand(1);
19186 SDValue N2 = N->getOperand(2);
19187 SDNodeFlags Flags = N->getFlags();
19188 EVT VT = N->getValueType(0);
19189 SelectionDAG &DAG = DCI.DAG;
19191 unsigned Opc = N->getOpcode();
19193 bool LegalOps = !DCI.isBeforeLegalizeOps();
19194 SDLoc Loc(N);
19195
19196 if (!isOperationLegal(ISD::FMA, VT))
19197 return SDValue();
19198
19199 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19200 // since (fnmsub a b c)=-0 while c-ab=+0.
19201 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19202 return SDValue();
19203
19204 // (fma (fneg a) b c) => (fnmsub a b c)
19205 // (fnmsub (fneg a) b c) => (fma a b c)
19206 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19207 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19208
19209 // (fma a (fneg b) c) => (fnmsub a b c)
19210 // (fnmsub a (fneg b) c) => (fma a b c)
19211 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19212 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19213
19214 return SDValue();
19215}
19216
19217bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19218 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19219 if (!Subtarget.is64BitELFABI())
19220 return false;
19221
19222 // If not a tail call then no need to proceed.
19223 if (!CI->isTailCall())
19224 return false;
19225
19226 // If sibling calls have been disabled and tail-calls aren't guaranteed
19227 // there is no reason to duplicate.
19228 auto &TM = getTargetMachine();
19229 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19230 return false;
19231
19232 // Can't tail call a function called indirectly, or if it has variadic args.
19233 const Function *Callee = CI->getCalledFunction();
19234 if (!Callee || Callee->isVarArg())
19235 return false;
19236
19237 // Make sure the callee and caller calling conventions are eligible for tco.
19238 const Function *Caller = CI->getParent()->getParent();
19239 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19240 CI->getCallingConv()))
19241 return false;
19242
19243 // If the function is local then we have a good chance at tail-calling it
19244 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19245}
19246
19247bool PPCTargetLowering::
19248isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19249 const Value *Mask = AndI.getOperand(1);
19250 // If the mask is suitable for andi. or andis. we should sink the and.
19251 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19252 // Can't handle constants wider than 64-bits.
19253 if (CI->getBitWidth() > 64)
19254 return false;
19255 int64_t ConstVal = CI->getZExtValue();
19256 return isUInt<16>(ConstVal) ||
19257 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19258 }
19259
19260 // For non-constant masks, we can always use the record-form and.
19261 return true;
19262}
19263
19264/// getAddrModeForFlags - Based on the set of address flags, select the most
19265/// optimal instruction format to match by.
19266PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19267 // This is not a node we should be handling here.
19268 if (Flags == PPC::MOF_None)
19269 return PPC::AM_None;
19270 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19271 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19272 if ((Flags & FlagSet) == FlagSet)
19273 return PPC::AM_DForm;
19274 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19275 if ((Flags & FlagSet) == FlagSet)
19276 return PPC::AM_DSForm;
19277 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19278 if ((Flags & FlagSet) == FlagSet)
19279 return PPC::AM_DQForm;
19280 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19281 if ((Flags & FlagSet) == FlagSet)
19282 return PPC::AM_PrefixDForm;
19283 // If no other forms are selected, return an X-Form as it is the most
19284 // general addressing mode.
19285 return PPC::AM_XForm;
19286}
19287
19288/// Set alignment flags based on whether or not the Frame Index is aligned.
19289/// Utilized when computing flags for address computation when selecting
19290/// load and store instructions.
19291static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19292 SelectionDAG &DAG) {
19293 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19294 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19295 if (!FI)
19296 return;
19298 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19299 // If this is (add $FI, $S16Imm), the alignment flags are already set
19300 // based on the immediate. We just need to clear the alignment flags
19301 // if the FI alignment is weaker.
19302 if ((FrameIndexAlign % 4) != 0)
19303 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19304 if ((FrameIndexAlign % 16) != 0)
19305 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19306 // If the address is a plain FrameIndex, set alignment flags based on
19307 // FI alignment.
19308 if (!IsAdd) {
19309 if ((FrameIndexAlign % 4) == 0)
19310 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19311 if ((FrameIndexAlign % 16) == 0)
19312 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19313 }
19314}
19315
19316/// Given a node, compute flags that are used for address computation when
19317/// selecting load and store instructions. The flags computed are stored in
19318/// FlagSet. This function takes into account whether the node is a constant,
19319/// an ADD, OR, or a constant, and computes the address flags accordingly.
19320static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19321 SelectionDAG &DAG) {
19322 // Set the alignment flags for the node depending on if the node is
19323 // 4-byte or 16-byte aligned.
19324 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19325 if ((Imm & 0x3) == 0)
19326 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19327 if ((Imm & 0xf) == 0)
19328 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19329 };
19330
19331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
19332 // All 32-bit constants can be computed as LIS + Disp.
19333 const APInt &ConstImm = CN->getAPIntValue();
19334 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19335 FlagSet |= PPC::MOF_AddrIsSImm32;
19336 SetAlignFlagsForImm(ConstImm.getZExtValue());
19337 setAlignFlagsForFI(N, FlagSet, DAG);
19338 }
19339 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19340 FlagSet |= PPC::MOF_RPlusSImm34;
19341 else // Let constant materialization handle large constants.
19342 FlagSet |= PPC::MOF_NotAddNorCst;
19343 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19344 // This address can be represented as an addition of:
19345 // - Register + Imm16 (possibly a multiple of 4/16)
19346 // - Register + Imm34
19347 // - Register + PPCISD::Lo
19348 // - Register + Register
19349 // In any case, we won't have to match this as Base + Zero.
19350 SDValue RHS = N.getOperand(1);
19351 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
19352 const APInt &ConstImm = CN->getAPIntValue();
19353 if (ConstImm.isSignedIntN(16)) {
19354 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19355 SetAlignFlagsForImm(ConstImm.getZExtValue());
19356 setAlignFlagsForFI(N, FlagSet, DAG);
19357 }
19358 if (ConstImm.isSignedIntN(34))
19359 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19360 else
19361 FlagSet |= PPC::MOF_RPlusR; // Register.
19362 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19363 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19364 else
19365 FlagSet |= PPC::MOF_RPlusR;
19366 } else { // The address computation is not a constant or an addition.
19367 setAlignFlagsForFI(N, FlagSet, DAG);
19368 FlagSet |= PPC::MOF_NotAddNorCst;
19369 }
19370}
19371
19372static bool isPCRelNode(SDValue N) {
19373 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19374 isValidPCRelNode<ConstantPoolSDNode>(N) ||
19375 isValidPCRelNode<GlobalAddressSDNode>(N) ||
19376 isValidPCRelNode<JumpTableSDNode>(N) ||
19377 isValidPCRelNode<BlockAddressSDNode>(N));
19378}
19379
19380/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19381/// the address flags of the load/store instruction that is to be matched.
19382unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19383 SelectionDAG &DAG) const {
19384 unsigned FlagSet = PPC::MOF_None;
19385
19386 // Compute subtarget flags.
19387 if (!Subtarget.hasP9Vector())
19388 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19389 else
19390 FlagSet |= PPC::MOF_SubtargetP9;
19391
19392 if (Subtarget.hasPrefixInstrs())
19393 FlagSet |= PPC::MOF_SubtargetP10;
19394
19395 if (Subtarget.hasSPE())
19396 FlagSet |= PPC::MOF_SubtargetSPE;
19397
19398 // Check if we have a PCRel node and return early.
19399 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19400 return FlagSet;
19401
19402 // If the node is the paired load/store intrinsics, compute flags for
19403 // address computation and return early.
19404 unsigned ParentOp = Parent->getOpcode();
19405 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19406 (ParentOp == ISD::INTRINSIC_VOID))) {
19407 unsigned ID = Parent->getConstantOperandVal(1);
19408 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19409 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19410 ? Parent->getOperand(2)
19411 : Parent->getOperand(3);
19412 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19413 FlagSet |= PPC::MOF_Vector;
19414 return FlagSet;
19415 }
19416 }
19417
19418 // Mark this as something we don't want to handle here if it is atomic
19419 // or pre-increment instruction.
19420 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19421 if (LSB->isIndexed())
19422 return PPC::MOF_None;
19423
19424 // Compute in-memory type flags. This is based on if there are scalars,
19425 // floats or vectors.
19426 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19427 assert(MN && "Parent should be a MemSDNode!");
19428 EVT MemVT = MN->getMemoryVT();
19429 unsigned Size = MemVT.getSizeInBits();
19430 if (MemVT.isScalarInteger()) {
19431 assert(Size <= 128 &&
19432 "Not expecting scalar integers larger than 16 bytes!");
19433 if (Size < 32)
19434 FlagSet |= PPC::MOF_SubWordInt;
19435 else if (Size == 32)
19436 FlagSet |= PPC::MOF_WordInt;
19437 else
19438 FlagSet |= PPC::MOF_DoubleWordInt;
19439 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19440 if (Size == 128)
19441 FlagSet |= PPC::MOF_Vector;
19442 else if (Size == 256) {
19443 assert(Subtarget.pairedVectorMemops() &&
19444 "256-bit vectors are only available when paired vector memops is "
19445 "enabled!");
19446 FlagSet |= PPC::MOF_Vector;
19447 } else
19448 llvm_unreachable("Not expecting illegal vectors!");
19449 } else { // Floating point type: can be scalar, f128 or vector types.
19450 if (Size == 32 || Size == 64)
19451 FlagSet |= PPC::MOF_ScalarFloat;
19452 else if (MemVT == MVT::f128 || MemVT.isVector())
19453 FlagSet |= PPC::MOF_Vector;
19454 else
19455 llvm_unreachable("Not expecting illegal scalar floats!");
19456 }
19457
19458 // Compute flags for address computation.
19459 computeFlagsForAddressComputation(N, FlagSet, DAG);
19460
19461 // Compute type extension flags.
19462 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19463 switch (LN->getExtensionType()) {
19464 case ISD::SEXTLOAD:
19465 FlagSet |= PPC::MOF_SExt;
19466 break;
19467 case ISD::EXTLOAD:
19468 case ISD::ZEXTLOAD:
19469 FlagSet |= PPC::MOF_ZExt;
19470 break;
19471 case ISD::NON_EXTLOAD:
19472 FlagSet |= PPC::MOF_NoExt;
19473 break;
19474 }
19475 } else
19476 FlagSet |= PPC::MOF_NoExt;
19477
19478 // For integers, no extension is the same as zero extension.
19479 // We set the extension mode to zero extension so we don't have
19480 // to add separate entries in AddrModesMap for loads and stores.
19481 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19482 FlagSet |= PPC::MOF_ZExt;
19483 FlagSet &= ~PPC::MOF_NoExt;
19484 }
19485
19486 // If we don't have prefixed instructions, 34-bit constants should be
19487 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19488 bool IsNonP1034BitConst =
19490 FlagSet) == PPC::MOF_RPlusSImm34;
19491 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19492 IsNonP1034BitConst)
19493 FlagSet |= PPC::MOF_NotAddNorCst;
19494
19495 return FlagSet;
19496}
19497
19498/// SelectForceXFormMode - Given the specified address, force it to be
19499/// represented as an indexed [r+r] operation (an XForm instruction).
19501 SDValue &Base,
19502 SelectionDAG &DAG) const {
19503
19505 int16_t ForceXFormImm = 0;
19506 if (provablyDisjointOr(DAG, N) &&
19507 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19508 Disp = N.getOperand(0);
19509 Base = N.getOperand(1);
19510 return Mode;
19511 }
19512
19513 // If the address is the result of an add, we will utilize the fact that the
19514 // address calculation includes an implicit add. However, we can reduce
19515 // register pressure if we do not materialize a constant just for use as the
19516 // index register. We only get rid of the add if it is not an add of a
19517 // value and a 16-bit signed constant and both have a single use.
19518 if (N.getOpcode() == ISD::ADD &&
19519 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19520 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19521 Disp = N.getOperand(0);
19522 Base = N.getOperand(1);
19523 return Mode;
19524 }
19525
19526 // Otherwise, use R0 as the base register.
19527 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19528 N.getValueType());
19529 Base = N;
19530
19531 return Mode;
19532}
19533
19535 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19536 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19537 EVT ValVT = Val.getValueType();
19538 // If we are splitting a scalar integer into f64 parts (i.e. so they
19539 // can be placed into VFRC registers), we need to zero extend and
19540 // bitcast the values. This will ensure the value is placed into a
19541 // VSR using direct moves or stack operations as needed.
19542 if (PartVT == MVT::f64 &&
19543 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19544 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19545 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19546 Parts[0] = Val;
19547 return true;
19548 }
19549 return false;
19550}
19551
19552SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19553 SelectionDAG &DAG) const {
19554 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19556 EVT RetVT = Op.getValueType();
19557 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19558 SDValue Callee =
19559 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19560 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19562 for (const SDValue &N : Op->op_values()) {
19563 EVT ArgVT = N.getValueType();
19564 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19565 TargetLowering::ArgListEntry Entry(N, ArgTy);
19566 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19567 Entry.IsZExt = !Entry.IsSExt;
19568 Args.push_back(Entry);
19569 }
19570
19571 SDValue InChain = DAG.getEntryNode();
19572 SDValue TCChain = InChain;
19573 const Function &F = DAG.getMachineFunction().getFunction();
19574 bool isTailCall =
19575 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19576 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19577 if (isTailCall)
19578 InChain = TCChain;
19579 CLI.setDebugLoc(SDLoc(Op))
19580 .setChain(InChain)
19581 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19582 .setTailCall(isTailCall)
19583 .setSExtResult(SignExtend)
19584 .setZExtResult(!SignExtend)
19586 return TLI.LowerCallTo(CLI).first;
19587}
19588
19589SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19590 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19591 SelectionDAG &DAG) const {
19592 if (Op.getValueType() == MVT::f32)
19593 return lowerToLibCall(LibCallFloatName, Op, DAG);
19594
19595 if (Op.getValueType() == MVT::f64)
19596 return lowerToLibCall(LibCallDoubleName, Op, DAG);
19597
19598 return SDValue();
19599}
19600
19601bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19602 SDNodeFlags Flags = Op.getNode()->getFlags();
19603 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19604 Flags.hasNoNaNs() && Flags.hasNoInfs();
19605}
19606
19607bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19608 return Op.getNode()->getFlags().hasApproximateFuncs();
19609}
19610
19611bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19613}
19614
19615SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19616 const char *LibCallFloatName,
19617 const char *LibCallDoubleNameFinite,
19618 const char *LibCallFloatNameFinite,
19619 SDValue Op,
19620 SelectionDAG &DAG) const {
19621 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19622 return SDValue();
19623
19624 if (!isLowringToMASSFiniteSafe(Op))
19625 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19626 DAG);
19627
19628 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
19629 LibCallDoubleNameFinite, Op, DAG);
19630}
19631
19632SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19633 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
19634 "__xl_powf_finite", Op, DAG);
19635}
19636
19637SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19638 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
19639 "__xl_sinf_finite", Op, DAG);
19640}
19641
19642SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19643 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
19644 "__xl_cosf_finite", Op, DAG);
19645}
19646
19647SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19648 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
19649 "__xl_logf_finite", Op, DAG);
19650}
19651
19652SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19653 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
19654 "__xl_log10f_finite", Op, DAG);
19655}
19656
19657SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19658 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
19659 "__xl_expf_finite", Op, DAG);
19660}
19661
19662// If we happen to match to an aligned D-Form, check if the Frame Index is
19663// adequately aligned. If it is not, reset the mode to match to X-Form.
19664static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19665 PPC::AddrMode &Mode) {
19666 if (!isa<FrameIndexSDNode>(N))
19667 return;
19668 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19669 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
19670 Mode = PPC::AM_XForm;
19671}
19672
19673/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19674/// compute the address flags of the node, get the optimal address mode based
19675/// on the flags, and set the Base and Disp based on the address mode.
19677 SDValue N, SDValue &Disp,
19678 SDValue &Base,
19679 SelectionDAG &DAG,
19680 MaybeAlign Align) const {
19681 SDLoc DL(Parent);
19682
19683 // Compute the address flags.
19684 unsigned Flags = computeMOFlags(Parent, N, DAG);
19685
19686 // Get the optimal address mode based on the Flags.
19687 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19688
19689 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19690 // Select an X-Form load if it is not.
19691 setXFormForUnalignedFI(N, Flags, Mode);
19692
19693 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19694 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19695 assert(Subtarget.isUsingPCRelativeCalls() &&
19696 "Must be using PC-Relative calls when a valid PC-Relative node is "
19697 "present!");
19698 Mode = PPC::AM_PCRel;
19699 }
19700
19701 // Set Base and Disp accordingly depending on the address mode.
19702 switch (Mode) {
19703 case PPC::AM_DForm:
19704 case PPC::AM_DSForm:
19705 case PPC::AM_DQForm: {
19706 // This is a register plus a 16-bit immediate. The base will be the
19707 // register and the displacement will be the immediate unless it
19708 // isn't sufficiently aligned.
19709 if (Flags & PPC::MOF_RPlusSImm16) {
19710 SDValue Op0 = N.getOperand(0);
19711 SDValue Op1 = N.getOperand(1);
19712 int16_t Imm = Op1->getAsZExtVal();
19713 if (!Align || isAligned(*Align, Imm)) {
19714 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19715 Base = Op0;
19716 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
19717 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19718 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19719 }
19720 break;
19721 }
19722 }
19723 // This is a register plus the @lo relocation. The base is the register
19724 // and the displacement is the global address.
19725 else if (Flags & PPC::MOF_RPlusLo) {
19726 Disp = N.getOperand(1).getOperand(0); // The global address.
19731 Base = N.getOperand(0);
19732 break;
19733 }
19734 // This is a constant address at most 32 bits. The base will be
19735 // zero or load-immediate-shifted and the displacement will be
19736 // the low 16 bits of the address.
19737 else if (Flags & PPC::MOF_AddrIsSImm32) {
19738 auto *CN = cast<ConstantSDNode>(N);
19739 EVT CNType = CN->getValueType(0);
19740 uint64_t CNImm = CN->getZExtValue();
19741 // If this address fits entirely in a 16-bit sext immediate field, codegen
19742 // this as "d, 0".
19743 int16_t Imm;
19744 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19745 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19746 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19747 CNType);
19748 break;
19749 }
19750 // Handle 32-bit sext immediate with LIS + Addr mode.
19751 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19752 (!Align || isAligned(*Align, CNImm))) {
19753 int32_t Addr = (int32_t)CNImm;
19754 // Otherwise, break this down into LIS + Disp.
19755 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19756 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
19757 MVT::i32);
19758 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19759 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19760 break;
19761 }
19762 }
19763 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19764 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19765 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
19766 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19767 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19768 } else
19769 Base = N;
19770 break;
19771 }
19772 case PPC::AM_PrefixDForm: {
19773 int64_t Imm34 = 0;
19774 unsigned Opcode = N.getOpcode();
19775 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19776 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19777 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19778 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19779 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19780 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19781 else
19782 Base = N.getOperand(0);
19783 } else if (isIntS34Immediate(N, Imm34)) {
19784 // The address is a 34-bit signed immediate.
19785 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19786 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19787 }
19788 break;
19789 }
19790 case PPC::AM_PCRel: {
19791 // When selecting PC-Relative instructions, "Base" is not utilized as
19792 // we select the address as [PC+imm].
19793 Disp = N;
19794 break;
19795 }
19796 case PPC::AM_None:
19797 break;
19798 default: { // By default, X-Form is always available to be selected.
19799 // When a frame index is not aligned, we also match by XForm.
19800 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
19801 Base = FI ? N : N.getOperand(1);
19802 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19803 N.getValueType())
19804 : N.getOperand(0);
19805 break;
19806 }
19807 }
19808 return Mode;
19809}
19810
19812 bool Return,
19813 bool IsVarArg) const {
19814 switch (CC) {
19815 case CallingConv::Cold:
19816 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19817 default:
19818 return CC_PPC64_ELF;
19819 }
19820}
19821
19823 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19824}
19825
19828 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19829 if (shouldInlineQuadwordAtomics() && Size == 128)
19831
19832 switch (AI->getOperation()) {
19838 default:
19840 }
19841
19842 llvm_unreachable("unreachable atomicrmw operation");
19843}
19844
19847 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19848 if (shouldInlineQuadwordAtomics() && Size == 128)
19851}
19852
19853static Intrinsic::ID
19855 switch (BinOp) {
19856 default:
19857 llvm_unreachable("Unexpected AtomicRMW BinOp");
19859 return Intrinsic::ppc_atomicrmw_xchg_i128;
19860 case AtomicRMWInst::Add:
19861 return Intrinsic::ppc_atomicrmw_add_i128;
19862 case AtomicRMWInst::Sub:
19863 return Intrinsic::ppc_atomicrmw_sub_i128;
19864 case AtomicRMWInst::And:
19865 return Intrinsic::ppc_atomicrmw_and_i128;
19866 case AtomicRMWInst::Or:
19867 return Intrinsic::ppc_atomicrmw_or_i128;
19868 case AtomicRMWInst::Xor:
19869 return Intrinsic::ppc_atomicrmw_xor_i128;
19871 return Intrinsic::ppc_atomicrmw_nand_i128;
19872 }
19873}
19874
19876 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19877 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19878 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19879 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19880 Type *ValTy = Incr->getType();
19881 assert(ValTy->getPrimitiveSizeInBits() == 128);
19882 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19883 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19884 Value *IncrHi =
19885 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19886 Value *LoHi = Builder.CreateIntrinsic(
19888 {AlignedAddr, IncrLo, IncrHi});
19889 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19890 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19891 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19892 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19893 return Builder.CreateOr(
19894 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19895}
19896
19898 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19899 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19900 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19901 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19902 Type *ValTy = CmpVal->getType();
19903 assert(ValTy->getPrimitiveSizeInBits() == 128);
19904 Function *IntCmpXchg =
19905 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19906 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19907 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19908 Value *CmpHi =
19909 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19910 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19911 Value *NewHi =
19912 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19913 emitLeadingFence(Builder, CI, Ord);
19914 Value *LoHi =
19915 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19916 emitTrailingFence(Builder, CI, Ord);
19917 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19918 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19919 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19920 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19921 return Builder.CreateOr(
19922 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19923}
19924
19926 return Subtarget.useCRBits();
19927}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition: IVUsers.cpp:48
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Register const TargetRegisterInfo * TRI
static bool isConstantOrUndef(const SDValue Op)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:6057
bool isDenormal() const
Definition: APFloat.h:1450
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1795
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1396
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1722
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
BinOp getOperation() const
Definition: Instructions.h:819
This is an SDNode representing atomic operations.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:899
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1911
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1406
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1267
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1340
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1273
unsigned arg_size() const
Definition: InstrTypes.h:1290
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:198
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:872
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:850
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:842
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:165
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:214
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
arg_iterator arg_begin()
Definition: Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:214
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:623
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:265
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:269
bool hasHiddenVisibility() const
Definition: GlobalValue.h:252
LLVM_ABI StringRef getSection() const
Definition: Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:638
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:132
bool hasComdat() const
Definition: GlobalValue.h:243
Type * getValueType() const
Definition: GlobalValue.h:298
bool hasProtectedVisibility() const
Definition: GlobalValue.h:253
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2230
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:180
bool isUnordered() const
Definition: Instructions.h:253
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
Metadata node.
Definition: Metadata.h:1077
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:410
bool is32BitELFABI() const
Definition: PPCSubtarget.h:224
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:267
MVT getScalarIntVT() const
Definition: PPCSubtarget.h:254
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool useSoftFloat() const
Definition: PPCSubtarget.h:179
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:207
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:261
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:279
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:211
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:285
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:297
bool is64BitELFABI() const
Definition: PPCSubtarget.h:223
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:303
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:157
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:273
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:720
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:500
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:763
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
int getNumOccurrences() const
Definition: CommandLine.h:400
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ TargetConstantPool
Definition: ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ STRICT_FCEIL
Definition: ISDOpcodes.h:454
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1309
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:1018
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:431
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:957
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ TargetExternalSymbol
Definition: ISDOpcodes.h:185
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1157
@ TargetJumpTable
Definition: ISDOpcodes.h:183
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:347
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:180
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:458
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:452
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:453
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ ConstantPool
Definition: ISDOpcodes.h:92
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ STRICT_FROUND
Definition: ISDOpcodes.h:456
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:477
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:455
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1207
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1292
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1204
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ AssertZext
Definition: ISDOpcodes.h:63
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1250
@ STRICT_FRINT
Definition: ISDOpcodes.h:450
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1439
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1315
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1724
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1640
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1691
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1671
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1730
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:134
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ VSRQ
VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ ADDC
These nodes represent PPC arithmetic operations with carry.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:106
@ XTY_ER
External reference.
Definition: XCOFF.h:242
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
unsigned M1(unsigned Val)
Definition: VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:164
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:126
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition: VE.h:376
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:559
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:577
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:280
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:269
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:308
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)