LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/CallingConv.h"
58#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsPowerPC.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
83#include "llvm/Support/Debug.h"
85#include "llvm/Support/Format.h"
91#include <algorithm>
92#include <cassert>
93#include <cstdint>
94#include <iterator>
95#include <list>
96#include <optional>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "ppc-lowering"
103
105 "disable-p10-store-forward",
106 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
107 cl::init(false));
108
109static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
110cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
113cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
116cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
117
118static cl::opt<bool> DisableSCO("disable-ppc-sco",
119cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
120
121static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
122cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
123
124static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
125cl::desc("use absolute jump tables on ppc"), cl::Hidden);
126
127static cl::opt<bool>
128 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
129 cl::desc("disable vector permute decomposition"),
130 cl::init(true), cl::Hidden);
131
133 "disable-auto-paired-vec-st",
134 cl::desc("disable automatically generated 32byte paired vector stores"),
135 cl::init(true), cl::Hidden);
136
138 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
139 cl::desc("Set minimum number of entries to use a jump table on PPC"));
140
142 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
143 cl::desc("max depth when checking alias info in GatherAllAliases()"));
144
146 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
147 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
148 "function to use initial-exec"));
149
150STATISTIC(NumTailCalls, "Number of tail calls");
151STATISTIC(NumSiblingCalls, "Number of sibling calls");
152STATISTIC(ShufflesHandledWithVPERM,
153 "Number of shuffles lowered to a VPERM or XXPERM");
154STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
155
156static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
157
158static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
159
160// A faster local-[exec|dynamic] TLS access sequence (enabled with the
161// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
162// variables; consistent with the IBM XL compiler, we apply a max size of
163// slightly under 32KB.
165
166// FIXME: Remove this once the bug has been fixed!
168
170 const PPCSubtarget &STI)
171 : TargetLowering(TM), Subtarget(STI) {
172 // Initialize map that relates the PPC addressing modes to the computed flags
173 // of a load/store instruction. The map is used to determine the optimal
174 // addressing mode when selecting load and stores.
175 initializeAddrModeMap();
176 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
177 // arguments are at least 4/8 bytes aligned.
178 bool isPPC64 = Subtarget.isPPC64();
179 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
180 const MVT RegVT = Subtarget.getScalarIntVT();
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
198
199 // PowerPC uses addo_carry,subo_carry to propagate carry.
202
203 // On P10, the default lowering generates better code using the
204 // setbc instruction.
205 if (!Subtarget.hasP10Vector()) {
207 if (isPPC64)
209 }
210
211 // Match BITREVERSE to customized fast code sequence in the td file.
214
215 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
217
218 // Custom lower inline assembly to check for special registers.
221
222 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
223 for (MVT VT : MVT::integer_valuetypes()) {
226 }
227
228 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
230
231 if (Subtarget.isISA3_0()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
235 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
236 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
237 } else {
238 // No extending loads from f16 or HW conversions back and forth.
239 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
241 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
244 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
247 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
248 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
249 }
250
251 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
252
253 // PowerPC has pre-inc load and store's.
264 if (!Subtarget.hasSPE()) {
269 }
270
271 if (Subtarget.useCRBits()) {
273
274 if (isPPC64 || Subtarget.hasFPCVT()) {
279
281 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
283 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
284
289
291 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
293 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
294 } else {
299 }
300
301 // PowerPC does not support direct load/store of condition registers.
304
305 // FIXME: Remove this once the ANDI glue bug is fixed:
306 if (ANDIGlueBug)
308
309 for (MVT VT : MVT::integer_valuetypes()) {
312 setTruncStoreAction(VT, MVT::i1, Expand);
313 }
314
315 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
316 }
317
318 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
319 // PPC (the libcall is not available).
324
325 // We do not currently implement these libm ops for PowerPC.
326 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
331 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
332
333 // PowerPC has no SREM/UREM instructions unless we are on P9
334 // On P9 we may use a hardware instruction to compute the remainder.
335 // When the result of both the remainder and the division is required it is
336 // more efficient to compute the remainder from the result of the division
337 // rather than use the remainder instruction. The instructions are legalized
338 // directly because the DivRemPairsPass performs the transformation at the IR
339 // level.
340 if (Subtarget.isISA3_0()) {
345 } else {
350 }
351
352 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
361
362 // Handle constrained floating-point operations of scalar.
363 // TODO: Handle SPE specific operation.
369
374
375 if (!Subtarget.hasSPE()) {
378 }
379
380 if (Subtarget.hasVSX()) {
383 }
384
385 if (Subtarget.hasFSQRT()) {
388 }
389
390 if (Subtarget.hasFPRND()) {
395
400 }
401
402 // We don't support sin/cos/sqrt/fmod/pow
413
414 // MASS transformation for LLVM intrinsics with replicating fast-math flag
415 // to be consistent to PPCGenScalarMASSEntries pass
416 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
429 }
430
431 if (Subtarget.hasSPE()) {
434 } else {
435 setOperationAction(ISD::FMA , MVT::f64, Legal);
436 setOperationAction(ISD::FMA , MVT::f32, Legal);
439 }
440
441 if (Subtarget.hasSPE())
442 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
443
444 // If we're enabling GP optimizations, use hardware square root
445 if (!Subtarget.hasFSQRT() &&
446 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
447 Subtarget.hasFRE()))
449
450 if (!Subtarget.hasFSQRT() &&
451 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
452 Subtarget.hasFRES()))
454
455 if (Subtarget.hasFCPSGN()) {
458 } else {
461 }
462
463 if (Subtarget.hasFPRND()) {
468
473 }
474
475 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
476 // instruction xxbrd to speed up scalar BSWAP64.
477 if (Subtarget.isISA3_1()) {
480 } else {
483 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
484 }
485
486 // CTPOP or CTTZ were introduced in P8/P9 respectively
487 if (Subtarget.isISA3_0()) {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
490 } else {
491 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
492 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
493 }
494
495 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
498 } else {
501 }
502
503 // PowerPC does not have ROTR
506
507 if (!Subtarget.useCRBits()) {
508 // PowerPC does not have Select
513 }
514
515 // PowerPC wants to turn select_cc of FP into fsel when possible.
518
519 // PowerPC wants to optimize integer setcc a bit
520 if (!Subtarget.useCRBits())
522
523 if (Subtarget.hasFPU()) {
527
531 }
532
533 // PowerPC does not have BRCOND which requires SetCC
534 if (!Subtarget.useCRBits())
536
538
539 if (Subtarget.hasSPE()) {
540 // SPE has built-in conversions
547
548 // SPE supports signaling compare of f32/f64.
551 } else {
552 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
555
556 // PowerPC does not have [U|S]INT_TO_FP
561 }
562
563 if (Subtarget.hasDirectMove() && isPPC64) {
568 if (TM.Options.UnsafeFPMath) {
577 }
578 } else {
583 }
584
585 // We cannot sextinreg(i1). Expand to shifts.
587
588 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
589 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
590 // support continuation, user-level threading, and etc.. As a result, no
591 // other SjLj exception interfaces are implemented and please don't build
592 // your own exception handling based on them.
593 // LLVM/Clang supports zero-cost DWARF exception handling.
596
597 // We want to legalize GlobalAddress and ConstantPool nodes into the
598 // appropriate instructions to materialize the address.
609
610 // TRAP is legal.
611 setOperationAction(ISD::TRAP, MVT::Other, Legal);
612
613 // TRAMPOLINE is custom lowered.
616
617 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
619
620 if (Subtarget.is64BitELFABI()) {
621 // VAARG always uses double-word chunks, so promote anything smaller.
623 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
625 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
627 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
629 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
631 } else if (Subtarget.is32BitELFABI()) {
632 // VAARG is custom lowered with the 32-bit SVR4 ABI.
635 } else
637
638 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
639 if (Subtarget.is32BitELFABI())
641 else
643
644 // Use the default implementation.
645 setOperationAction(ISD::VAEND , MVT::Other, Expand);
654
655 // We want to custom lower some of our intrinsics.
661
662 // To handle counter-based loop conditions.
664
669
670 // Comparisons that require checking two conditions.
671 if (Subtarget.hasSPE()) {
676 }
689
692
693 if (Subtarget.has64BitSupport()) {
694 // They also have instructions for converting between i64 and fp.
703 // This is just the low 32 bits of a (signed) fp->i64 conversion.
704 // We cannot do this with Promote because i64 is not a legal type.
707
708 if (Subtarget.hasLFIWAX() || isPPC64) {
711 }
712 } else {
713 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
714 if (Subtarget.hasSPE()) {
717 } else {
720 }
721 }
722
723 // With the instructions enabled under FPCVT, we can do everything.
724 if (Subtarget.hasFPCVT()) {
725 if (Subtarget.has64BitSupport()) {
734 }
735
744 }
745
746 if (Subtarget.use64BitRegs()) {
747 // 64-bit PowerPC implementations can support i64 types directly
748 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
749 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
751 // 64-bit PowerPC wants to expand i128 shifts itself.
755 } else {
756 // 32-bit PowerPC wants to expand i64 shifts itself.
760 }
761
762 // PowerPC has better expansions for funnel shifts than the generic
763 // TargetLowering::expandFunnelShift.
764 if (Subtarget.has64BitSupport()) {
767 }
770
771 if (Subtarget.hasVSX()) {
778 }
779
780 if (Subtarget.hasAltivec()) {
781 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
786 }
787 // First set operation action for all vector types to expand. Then we
788 // will selectively turn on ones that can be effectively codegen'd.
790 // add/sub are legal for all supported vector VT's.
793
794 // For v2i64, these are only valid with P8Vector. This is corrected after
795 // the loop.
796 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
801 }
802 else {
807 }
808
809 if (Subtarget.hasVSX()) {
812 }
813
814 // Vector instructions introduced in P8
815 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 }
819 else {
822 }
823
824 // Vector instructions introduced in P9
825 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
827 else
829
830 // We promote all shuffles to v16i8.
832 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
833
834 // We promote all non-typed operations to v4i32.
836 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
838 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
840 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
842 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
844 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
847 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
849 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
850
851 // No other operations are legal.
890
891 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
892 setTruncStoreAction(VT, InnerVT, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
896 }
897 }
899 if (!Subtarget.hasP8Vector()) {
900 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
901 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
903 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
904 }
905
906 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
907 // with merges, splats, etc.
909
910 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
911 // are cheap, so handle them before they get expanded to scalar.
917
918 setOperationAction(ISD::AND , MVT::v4i32, Legal);
919 setOperationAction(ISD::OR , MVT::v4i32, Legal);
920 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
921 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
923 Subtarget.useCRBits() ? Legal : Expand);
924 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
934 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
937
938 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
939 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
940 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
941 if (Subtarget.hasAltivec())
942 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
944 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
945 if (Subtarget.hasP8Altivec())
946 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
947
948 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
951 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
952
953 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
954 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
955
956 if (Subtarget.hasVSX()) {
957 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
958 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
960 }
961
962 if (Subtarget.hasP8Altivec())
963 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
964 else
965 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
966
967 if (Subtarget.isISA3_1()) {
968 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
971 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
972 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
973 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
975 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
977 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
979 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
981 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
983 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
985 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
986 }
987
988 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
989 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
990
993 // LE is P8+/64-bit so direct moves are supported and these operations
994 // are legal. The custom transformation requires 64-bit since we need a
995 // pair of stores that will cover a 128-bit load for P10.
996 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1000 }
1001
1006
1007 // Altivec does not contain unordered floating-point compare instructions
1008 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1011 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1012
1013 if (Subtarget.hasVSX()) {
1016 if (Subtarget.hasP8Vector()) {
1019 }
1020 if (Subtarget.hasDirectMove() && isPPC64) {
1029 }
1031
1032 // The nearbyint variants are not allowed to raise the inexact exception
1033 // so we can only code-gen them with unsafe math.
1034 if (TM.Options.UnsafeFPMath) {
1037 }
1038
1039 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1041 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1047
1049 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1053
1054 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1056
1057 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1059
1060 // Share the Altivec comparison restrictions.
1061 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1064 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1065
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1068
1070
1071 if (Subtarget.hasP8Vector())
1072 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1073
1074 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1075
1076 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1078 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1079
1080 if (Subtarget.hasP8Altivec()) {
1081 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1084
1085 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1086 // SRL, but not for SRA because of the instructions available:
1087 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1088 // doing
1089 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1091 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1092
1093 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1094 }
1095 else {
1096 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1098 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1099
1100 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1101
1102 // VSX v2i64 only supports non-arithmetic operations.
1103 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1104 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1105 }
1106
1107 if (Subtarget.isISA3_1())
1108 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1109 else
1110 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1113 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1115 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1116
1118
1127
1128 // Custom handling for partial vectors of integers converted to
1129 // floating point. We already have optimal handling for v2i32 through
1130 // the DAG combine, so those aren't necessary.
1147
1148 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1149 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1150 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1151 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1154
1157
1158 // Handle constrained floating-point operations of vector.
1159 // The predictor is `hasVSX` because altivec instruction has
1160 // no exception but VSX vector instruction has.
1174
1188
1189 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1190 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1191
1192 for (MVT FPT : MVT::fp_valuetypes())
1193 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1194
1195 // Expand the SELECT to SELECT_CC
1197
1198 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1199 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1200
1201 // No implementation for these ops for PowerPC.
1203 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1204 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1205 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1207 setOperationAction(ISD::FREM, MVT::f128, Expand);
1208 }
1209
1210 if (Subtarget.hasP8Altivec()) {
1211 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1212 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1213 }
1214
1215 if (Subtarget.hasP9Vector()) {
1218
1219 // Test data class instructions store results in CR bits.
1220 if (Subtarget.useCRBits()) {
1225 }
1226
1227 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1228 // SRL, but not for SRA because of the instructions available:
1229 // VS{RL} and VS{RL}O.
1230 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1232 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1233
1234 setOperationAction(ISD::FADD, MVT::f128, Legal);
1235 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1236 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1237 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1239
1240 setOperationAction(ISD::FMA, MVT::f128, Legal);
1247
1249 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1251 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1254
1258
1259 // Handle constrained floating-point operations of fp128
1276 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1279 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1280 } else if (Subtarget.hasVSX()) {
1283
1284 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1285 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1286
1287 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1288 // fp_to_uint and int_to_fp.
1291
1292 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1293 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1294 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1295 setOperationAction(ISD::FABS, MVT::f128, Expand);
1297 setOperationAction(ISD::FMA, MVT::f128, Expand);
1299
1300 // Expand the fp_extend if the target type is fp128.
1303
1304 // Expand the fp_round if the source type is fp128.
1305 for (MVT VT : {MVT::f32, MVT::f64}) {
1308 }
1309
1314
1315 // Lower following f128 select_cc pattern:
1316 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1318
1319 // We need to handle f128 SELECT_CC with integer result type.
1321 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1322 }
1323
1324 if (Subtarget.hasP9Altivec()) {
1325 if (Subtarget.isISA3_1()) {
1330 } else {
1333 }
1341
1342 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1344 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1345 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1346 }
1347
1348 if (Subtarget.hasP10Vector()) {
1350 }
1351 }
1352
1353 if (Subtarget.pairedVectorMemops()) {
1354 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1355 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1356 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1357 }
1358 if (Subtarget.hasMMA()) {
1359 if (Subtarget.isISAFuture()) {
1360 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1361 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1362 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1363 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1364 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1365 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1366 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1367 } else {
1368 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1369 }
1370 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1371 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1373 }
1374
1375 if (Subtarget.has64BitSupport())
1377
1378 if (Subtarget.isISA3_1())
1379 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1380
1381 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1382
1383 if (!isPPC64) {
1386 }
1387
1392 }
1393
1395
1396 if (Subtarget.hasAltivec()) {
1397 // Altivec instructions set fields to all zeros or all ones.
1399 }
1400
1403 else if (isPPC64)
1405 else
1407
1408 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1409
1410 // We have target-specific dag combine patterns for the following nodes:
1413 if (Subtarget.hasFPCVT())
1416 if (Subtarget.useCRBits())
1420
1422
1424
1425 if (Subtarget.useCRBits()) {
1427 }
1428
1429 // With 32 condition bits, we don't need to sink (and duplicate) compares
1430 // aggressively in CodeGenPrep.
1431 if (Subtarget.useCRBits()) {
1433 }
1434
1435 // TODO: The default entry number is set to 64. This stops most jump table
1436 // generation on PPC. But it is good for current PPC HWs because the indirect
1437 // branch instruction mtctr to the jump table may lead to bad branch predict.
1438 // Re-evaluate this value on future HWs that can do better with mtctr.
1440
1442 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1443
1444 auto CPUDirective = Subtarget.getCPUDirective();
1445 switch (CPUDirective) {
1446 default: break;
1447 case PPC::DIR_970:
1448 case PPC::DIR_A2:
1449 case PPC::DIR_E500:
1450 case PPC::DIR_E500mc:
1451 case PPC::DIR_E5500:
1452 case PPC::DIR_PWR4:
1453 case PPC::DIR_PWR5:
1454 case PPC::DIR_PWR5X:
1455 case PPC::DIR_PWR6:
1456 case PPC::DIR_PWR6X:
1457 case PPC::DIR_PWR7:
1458 case PPC::DIR_PWR8:
1459 case PPC::DIR_PWR9:
1460 case PPC::DIR_PWR10:
1461 case PPC::DIR_PWR11:
1465 break;
1466 }
1467
1468 if (Subtarget.enableMachineScheduler())
1470 else
1472
1474
1475 // The Freescale cores do better with aggressive inlining of memcpy and
1476 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1477 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1478 MaxStoresPerMemset = 32;
1480 MaxStoresPerMemcpy = 32;
1484 } else if (CPUDirective == PPC::DIR_A2) {
1485 // The A2 also benefits from (very) aggressive inlining of memcpy and
1486 // friends. The overhead of a the function call, even when warm, can be
1487 // over one hundred cycles.
1488 MaxStoresPerMemset = 128;
1489 MaxStoresPerMemcpy = 128;
1490 MaxStoresPerMemmove = 128;
1491 MaxLoadsPerMemcmp = 128;
1492 } else {
1495 }
1496
1497 // Enable generation of STXVP instructions by default for mcpu=future.
1498 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1500 DisableAutoPairedVecSt = false;
1501
1502 IsStrictFPEnabled = true;
1503
1504 // Let the subtarget (CPU) decide if a predictable select is more expensive
1505 // than the corresponding branch. This information is used in CGP to decide
1506 // when to convert selects into branches.
1508
1510}
1511
1512// *********************************** NOTE ************************************
1513// For selecting load and store instructions, the addressing modes are defined
1514// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1515// patterns to match the load the store instructions.
1516//
1517// The TD definitions for the addressing modes correspond to their respective
1518// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1519// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1520// address mode flags of a particular node. Afterwards, the computed address
1521// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1522// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1523// accordingly, based on the preferred addressing mode.
1524//
1525// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1526// MemOpFlags contains all the possible flags that can be used to compute the
1527// optimal addressing mode for load and store instructions.
1528// AddrMode contains all the possible load and store addressing modes available
1529// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1530//
1531// When adding new load and store instructions, it is possible that new address
1532// flags may need to be added into MemOpFlags, and a new addressing mode will
1533// need to be added to AddrMode. An entry of the new addressing mode (consisting
1534// of the minimal and main distinguishing address flags for the new load/store
1535// instructions) will need to be added into initializeAddrModeMap() below.
1536// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1537// need to be updated to account for selecting the optimal addressing mode.
1538// *****************************************************************************
1539/// Initialize the map that relates the different addressing modes of the load
1540/// and store instructions to a set of flags. This ensures the load/store
1541/// instruction is correctly matched during instruction selection.
1542void PPCTargetLowering::initializeAddrModeMap() {
1543 AddrModesMap[PPC::AM_DForm] = {
1544 // LWZ, STW
1549 // LBZ, LHZ, STB, STH
1554 // LHA
1559 // LFS, LFD, STFS, STFD
1564 };
1565 AddrModesMap[PPC::AM_DSForm] = {
1566 // LWA
1570 // LD, STD
1574 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1578 };
1579 AddrModesMap[PPC::AM_DQForm] = {
1580 // LXV, STXV
1584 };
1585 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1587 // TODO: Add mapping for quadword load/store.
1588}
1589
1590/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1591/// the desired ByVal argument alignment.
1592static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1593 if (MaxAlign == MaxMaxAlign)
1594 return;
1595 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1596 if (MaxMaxAlign >= 32 &&
1597 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1598 MaxAlign = Align(32);
1599 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1600 MaxAlign < 16)
1601 MaxAlign = Align(16);
1602 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1603 Align EltAlign;
1604 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1605 if (EltAlign > MaxAlign)
1606 MaxAlign = EltAlign;
1607 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1608 for (auto *EltTy : STy->elements()) {
1609 Align EltAlign;
1610 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1611 if (EltAlign > MaxAlign)
1612 MaxAlign = EltAlign;
1613 if (MaxAlign == MaxMaxAlign)
1614 break;
1615 }
1616 }
1617}
1618
1619/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1620/// function arguments in the caller parameter area.
1622 const DataLayout &DL) const {
1623 // 16byte and wider vectors are passed on 16byte boundary.
1624 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1625 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1626 if (Subtarget.hasAltivec())
1627 getMaxByValAlign(Ty, Alignment, Align(16));
1628 return Alignment;
1629}
1630
1632 return Subtarget.useSoftFloat();
1633}
1634
1636 return Subtarget.hasSPE();
1637}
1638
1640 return VT.isScalarInteger();
1641}
1642
1644 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1645 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1646 return false;
1647
1648 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1649 if (VTy->getScalarType()->isIntegerTy()) {
1650 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1651 if (ElemSizeInBits == 32) {
1652 Index = Subtarget.isLittleEndian() ? 2 : 1;
1653 return true;
1654 }
1655 if (ElemSizeInBits == 64) {
1656 Index = Subtarget.isLittleEndian() ? 1 : 0;
1657 return true;
1658 }
1659 }
1660 }
1661 return false;
1662}
1663
1664const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1665 switch ((PPCISD::NodeType)Opcode) {
1666 case PPCISD::FIRST_NUMBER: break;
1667 case PPCISD::FSEL: return "PPCISD::FSEL";
1668 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1669 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1670 case PPCISD::FCFID: return "PPCISD::FCFID";
1671 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1672 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1673 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1674 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1675 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1676 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1677 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1678 case PPCISD::FRE: return "PPCISD::FRE";
1679 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1680 case PPCISD::FTSQRT:
1681 return "PPCISD::FTSQRT";
1682 case PPCISD::FSQRT:
1683 return "PPCISD::FSQRT";
1684 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1685 case PPCISD::VPERM: return "PPCISD::VPERM";
1686 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1688 return "PPCISD::XXSPLTI_SP_TO_DP";
1690 return "PPCISD::XXSPLTI32DX";
1691 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1692 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1693 case PPCISD::XXPERM:
1694 return "PPCISD::XXPERM";
1695 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1696 case PPCISD::CMPB: return "PPCISD::CMPB";
1697 case PPCISD::Hi: return "PPCISD::Hi";
1698 case PPCISD::Lo: return "PPCISD::Lo";
1699 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1700 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1701 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1702 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1703 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1704 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1705 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1706 case PPCISD::SRL: return "PPCISD::SRL";
1707 case PPCISD::SRA: return "PPCISD::SRA";
1708 case PPCISD::SHL: return "PPCISD::SHL";
1709 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1710 case PPCISD::CALL: return "PPCISD::CALL";
1711 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1712 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1713 case PPCISD::CALL_RM:
1714 return "PPCISD::CALL_RM";
1716 return "PPCISD::CALL_NOP_RM";
1718 return "PPCISD::CALL_NOTOC_RM";
1719 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1720 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1721 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1722 case PPCISD::BCTRL_RM:
1723 return "PPCISD::BCTRL_RM";
1725 return "PPCISD::BCTRL_LOAD_TOC_RM";
1726 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1727 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1728 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1729 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1730 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1731 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1732 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1733 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1734 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1735 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1737 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1739 return "PPCISD::ANDI_rec_1_EQ_BIT";
1741 return "PPCISD::ANDI_rec_1_GT_BIT";
1742 case PPCISD::VCMP: return "PPCISD::VCMP";
1743 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1744 case PPCISD::LBRX: return "PPCISD::LBRX";
1745 case PPCISD::STBRX: return "PPCISD::STBRX";
1746 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1747 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1748 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1749 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1750 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1751 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1752 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1753 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1754 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1756 return "PPCISD::ST_VSR_SCAL_INT";
1757 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1758 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1759 case PPCISD::BDZ: return "PPCISD::BDZ";
1760 case PPCISD::MFFS: return "PPCISD::MFFS";
1761 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1762 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1763 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1764 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1765 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1766 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1767 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1768 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1769 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1770 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1771 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1772 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1773 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1774 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1775 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1776 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1777 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1778 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1779 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1780 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1781 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1782 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1783 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1785 return "PPCISD::PADDI_DTPREL";
1786 case PPCISD::VADD_SPLAT:
1787 return "PPCISD::VADD_SPLAT";
1788 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1789 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1790 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1791 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1792 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1793 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1794 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1795 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1796 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1798 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1800 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1801 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1802 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1803 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1804 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1805 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1806 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1807 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1808 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1810 return "PPCISD::STRICT_FADDRTZ";
1812 return "PPCISD::STRICT_FCTIDZ";
1814 return "PPCISD::STRICT_FCTIWZ";
1816 return "PPCISD::STRICT_FCTIDUZ";
1818 return "PPCISD::STRICT_FCTIWUZ";
1820 return "PPCISD::STRICT_FCFID";
1822 return "PPCISD::STRICT_FCFIDU";
1824 return "PPCISD::STRICT_FCFIDS";
1826 return "PPCISD::STRICT_FCFIDUS";
1827 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1828 case PPCISD::STORE_COND:
1829 return "PPCISD::STORE_COND";
1830 case PPCISD::SETBC:
1831 return "PPCISD::SETBC";
1832 case PPCISD::SETBCR:
1833 return "PPCISD::SETBCR";
1834 case PPCISD::ADDC:
1835 return "PPCISD::ADDC";
1836 case PPCISD::ADDE:
1837 return "PPCISD::ADDE";
1838 case PPCISD::SUBC:
1839 return "PPCISD::SUBC";
1840 case PPCISD::SUBE:
1841 return "PPCISD::SUBE";
1842 }
1843 return nullptr;
1844}
1845
1847 EVT VT) const {
1848 if (!VT.isVector())
1849 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1850
1852}
1853
1855 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1856 return true;
1857}
1858
1859//===----------------------------------------------------------------------===//
1860// Node matching predicates, for use by the tblgen matching code.
1861//===----------------------------------------------------------------------===//
1862
1863/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1865 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1866 return CFP->getValueAPF().isZero();
1867 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1868 // Maybe this has already been legalized into the constant pool?
1869 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1870 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1871 return CFP->getValueAPF().isZero();
1872 }
1873 return false;
1874}
1875
1876/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1877/// true if Op is undef or if it matches the specified value.
1878static bool isConstantOrUndef(int Op, int Val) {
1879 return Op < 0 || Op == Val;
1880}
1881
1882/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1883/// VPKUHUM instruction.
1884/// The ShuffleKind distinguishes between big-endian operations with
1885/// two different inputs (0), either-endian operations with two identical
1886/// inputs (1), and little-endian operations with two different inputs (2).
1887/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1889 SelectionDAG &DAG) {
1890 bool IsLE = DAG.getDataLayout().isLittleEndian();
1891 if (ShuffleKind == 0) {
1892 if (IsLE)
1893 return false;
1894 for (unsigned i = 0; i != 16; ++i)
1895 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1896 return false;
1897 } else if (ShuffleKind == 2) {
1898 if (!IsLE)
1899 return false;
1900 for (unsigned i = 0; i != 16; ++i)
1901 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1902 return false;
1903 } else if (ShuffleKind == 1) {
1904 unsigned j = IsLE ? 0 : 1;
1905 for (unsigned i = 0; i != 8; ++i)
1906 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1907 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1908 return false;
1909 }
1910 return true;
1911}
1912
1913/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1914/// VPKUWUM instruction.
1915/// The ShuffleKind distinguishes between big-endian operations with
1916/// two different inputs (0), either-endian operations with two identical
1917/// inputs (1), and little-endian operations with two different inputs (2).
1918/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1920 SelectionDAG &DAG) {
1921 bool IsLE = DAG.getDataLayout().isLittleEndian();
1922 if (ShuffleKind == 0) {
1923 if (IsLE)
1924 return false;
1925 for (unsigned i = 0; i != 16; i += 2)
1926 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1927 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1928 return false;
1929 } else if (ShuffleKind == 2) {
1930 if (!IsLE)
1931 return false;
1932 for (unsigned i = 0; i != 16; i += 2)
1933 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1934 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1935 return false;
1936 } else if (ShuffleKind == 1) {
1937 unsigned j = IsLE ? 0 : 2;
1938 for (unsigned i = 0; i != 8; i += 2)
1939 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1940 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1941 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1942 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1943 return false;
1944 }
1945 return true;
1946}
1947
1948/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1949/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1950/// current subtarget.
1951///
1952/// The ShuffleKind distinguishes between big-endian operations with
1953/// two different inputs (0), either-endian operations with two identical
1954/// inputs (1), and little-endian operations with two different inputs (2).
1955/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1957 SelectionDAG &DAG) {
1958 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1959 if (!Subtarget.hasP8Vector())
1960 return false;
1961
1962 bool IsLE = DAG.getDataLayout().isLittleEndian();
1963 if (ShuffleKind == 0) {
1964 if (IsLE)
1965 return false;
1966 for (unsigned i = 0; i != 16; i += 4)
1967 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1968 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1969 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1970 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1971 return false;
1972 } else if (ShuffleKind == 2) {
1973 if (!IsLE)
1974 return false;
1975 for (unsigned i = 0; i != 16; i += 4)
1976 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1977 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1978 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1979 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1980 return false;
1981 } else if (ShuffleKind == 1) {
1982 unsigned j = IsLE ? 0 : 4;
1983 for (unsigned i = 0; i != 8; i += 4)
1984 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1985 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1986 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1987 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1988 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1989 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1990 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1991 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1992 return false;
1993 }
1994 return true;
1995}
1996
1997/// isVMerge - Common function, used to match vmrg* shuffles.
1998///
1999static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2000 unsigned LHSStart, unsigned RHSStart) {
2001 if (N->getValueType(0) != MVT::v16i8)
2002 return false;
2003 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2004 "Unsupported merge size!");
2005
2006 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2007 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2008 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2009 LHSStart+j+i*UnitSize) ||
2010 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2011 RHSStart+j+i*UnitSize))
2012 return false;
2013 }
2014 return true;
2015}
2016
2017/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2018/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2019/// The ShuffleKind distinguishes between big-endian merges with two
2020/// different inputs (0), either-endian merges with two identical inputs (1),
2021/// and little-endian merges with two different inputs (2). For the latter,
2022/// the input operands are swapped (see PPCInstrAltivec.td).
2024 unsigned ShuffleKind, SelectionDAG &DAG) {
2025 if (DAG.getDataLayout().isLittleEndian()) {
2026 if (ShuffleKind == 1) // unary
2027 return isVMerge(N, UnitSize, 0, 0);
2028 else if (ShuffleKind == 2) // swapped
2029 return isVMerge(N, UnitSize, 0, 16);
2030 else
2031 return false;
2032 } else {
2033 if (ShuffleKind == 1) // unary
2034 return isVMerge(N, UnitSize, 8, 8);
2035 else if (ShuffleKind == 0) // normal
2036 return isVMerge(N, UnitSize, 8, 24);
2037 else
2038 return false;
2039 }
2040}
2041
2042/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2043/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2044/// The ShuffleKind distinguishes between big-endian merges with two
2045/// different inputs (0), either-endian merges with two identical inputs (1),
2046/// and little-endian merges with two different inputs (2). For the latter,
2047/// the input operands are swapped (see PPCInstrAltivec.td).
2049 unsigned ShuffleKind, SelectionDAG &DAG) {
2050 if (DAG.getDataLayout().isLittleEndian()) {
2051 if (ShuffleKind == 1) // unary
2052 return isVMerge(N, UnitSize, 8, 8);
2053 else if (ShuffleKind == 2) // swapped
2054 return isVMerge(N, UnitSize, 8, 24);
2055 else
2056 return false;
2057 } else {
2058 if (ShuffleKind == 1) // unary
2059 return isVMerge(N, UnitSize, 0, 0);
2060 else if (ShuffleKind == 0) // normal
2061 return isVMerge(N, UnitSize, 0, 16);
2062 else
2063 return false;
2064 }
2065}
2066
2067/**
2068 * Common function used to match vmrgew and vmrgow shuffles
2069 *
2070 * The indexOffset determines whether to look for even or odd words in
2071 * the shuffle mask. This is based on the of the endianness of the target
2072 * machine.
2073 * - Little Endian:
2074 * - Use offset of 0 to check for odd elements
2075 * - Use offset of 4 to check for even elements
2076 * - Big Endian:
2077 * - Use offset of 0 to check for even elements
2078 * - Use offset of 4 to check for odd elements
2079 * A detailed description of the vector element ordering for little endian and
2080 * big endian can be found at
2081 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2082 * Targeting your applications - what little endian and big endian IBM XL C/C++
2083 * compiler differences mean to you
2084 *
2085 * The mask to the shuffle vector instruction specifies the indices of the
2086 * elements from the two input vectors to place in the result. The elements are
2087 * numbered in array-access order, starting with the first vector. These vectors
2088 * are always of type v16i8, thus each vector will contain 16 elements of size
2089 * 8. More info on the shuffle vector can be found in the
2090 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2091 * Language Reference.
2092 *
2093 * The RHSStartValue indicates whether the same input vectors are used (unary)
2094 * or two different input vectors are used, based on the following:
2095 * - If the instruction uses the same vector for both inputs, the range of the
2096 * indices will be 0 to 15. In this case, the RHSStart value passed should
2097 * be 0.
2098 * - If the instruction has two different vectors then the range of the
2099 * indices will be 0 to 31. In this case, the RHSStart value passed should
2100 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2101 * to 31 specify elements in the second vector).
2102 *
2103 * \param[in] N The shuffle vector SD Node to analyze
2104 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2105 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2106 * vector to the shuffle_vector instruction
2107 * \return true iff this shuffle vector represents an even or odd word merge
2108 */
2109static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2110 unsigned RHSStartValue) {
2111 if (N->getValueType(0) != MVT::v16i8)
2112 return false;
2113
2114 for (unsigned i = 0; i < 2; ++i)
2115 for (unsigned j = 0; j < 4; ++j)
2116 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2117 i*RHSStartValue+j+IndexOffset) ||
2118 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2119 i*RHSStartValue+j+IndexOffset+8))
2120 return false;
2121 return true;
2122}
2123
2124/**
2125 * Determine if the specified shuffle mask is suitable for the vmrgew or
2126 * vmrgow instructions.
2127 *
2128 * \param[in] N The shuffle vector SD Node to analyze
2129 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2130 * \param[in] ShuffleKind Identify the type of merge:
2131 * - 0 = big-endian merge with two different inputs;
2132 * - 1 = either-endian merge with two identical inputs;
2133 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2134 * little-endian merges).
2135 * \param[in] DAG The current SelectionDAG
2136 * \return true iff this shuffle mask
2137 */
2139 unsigned ShuffleKind, SelectionDAG &DAG) {
2140 if (DAG.getDataLayout().isLittleEndian()) {
2141 unsigned indexOffset = CheckEven ? 4 : 0;
2142 if (ShuffleKind == 1) // Unary
2143 return isVMerge(N, indexOffset, 0);
2144 else if (ShuffleKind == 2) // swapped
2145 return isVMerge(N, indexOffset, 16);
2146 else
2147 return false;
2148 }
2149 else {
2150 unsigned indexOffset = CheckEven ? 0 : 4;
2151 if (ShuffleKind == 1) // Unary
2152 return isVMerge(N, indexOffset, 0);
2153 else if (ShuffleKind == 0) // Normal
2154 return isVMerge(N, indexOffset, 16);
2155 else
2156 return false;
2157 }
2158 return false;
2159}
2160
2161/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2162/// amount, otherwise return -1.
2163/// The ShuffleKind distinguishes between big-endian operations with two
2164/// different inputs (0), either-endian operations with two identical inputs
2165/// (1), and little-endian operations with two different inputs (2). For the
2166/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2167int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2168 SelectionDAG &DAG) {
2169 if (N->getValueType(0) != MVT::v16i8)
2170 return -1;
2171
2172 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2173
2174 // Find the first non-undef value in the shuffle mask.
2175 unsigned i;
2176 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2177 /*search*/;
2178
2179 if (i == 16) return -1; // all undef.
2180
2181 // Otherwise, check to see if the rest of the elements are consecutively
2182 // numbered from this value.
2183 unsigned ShiftAmt = SVOp->getMaskElt(i);
2184 if (ShiftAmt < i) return -1;
2185
2186 ShiftAmt -= i;
2187 bool isLE = DAG.getDataLayout().isLittleEndian();
2188
2189 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2190 // Check the rest of the elements to see if they are consecutive.
2191 for (++i; i != 16; ++i)
2192 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2193 return -1;
2194 } else if (ShuffleKind == 1) {
2195 // Check the rest of the elements to see if they are consecutive.
2196 for (++i; i != 16; ++i)
2197 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2198 return -1;
2199 } else
2200 return -1;
2201
2202 if (isLE)
2203 ShiftAmt = 16 - ShiftAmt;
2204
2205 return ShiftAmt;
2206}
2207
2208/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2209/// specifies a splat of a single element that is suitable for input to
2210/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2212 EVT VT = N->getValueType(0);
2213 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2214 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2215
2216 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2217 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2218
2219 // The consecutive indices need to specify an element, not part of two
2220 // different elements. So abandon ship early if this isn't the case.
2221 if (N->getMaskElt(0) % EltSize != 0)
2222 return false;
2223
2224 // This is a splat operation if each element of the permute is the same, and
2225 // if the value doesn't reference the second vector.
2226 unsigned ElementBase = N->getMaskElt(0);
2227
2228 // FIXME: Handle UNDEF elements too!
2229 if (ElementBase >= 16)
2230 return false;
2231
2232 // Check that the indices are consecutive, in the case of a multi-byte element
2233 // splatted with a v16i8 mask.
2234 for (unsigned i = 1; i != EltSize; ++i)
2235 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2236 return false;
2237
2238 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2239 // An UNDEF element is a sequence of UNDEF bytes.
2240 if (N->getMaskElt(i) < 0) {
2241 for (unsigned j = 1; j != EltSize; ++j)
2242 if (N->getMaskElt(i + j) >= 0)
2243 return false;
2244 } else
2245 for (unsigned j = 0; j != EltSize; ++j)
2246 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2247 return false;
2248 }
2249 return true;
2250}
2251
2252/// Check that the mask is shuffling N byte elements. Within each N byte
2253/// element of the mask, the indices could be either in increasing or
2254/// decreasing order as long as they are consecutive.
2255/// \param[in] N the shuffle vector SD Node to analyze
2256/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2257/// Word/DoubleWord/QuadWord).
2258/// \param[in] StepLen the delta indices number among the N byte element, if
2259/// the mask is in increasing/decreasing order then it is 1/-1.
2260/// \return true iff the mask is shuffling N byte elements.
2261static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2262 int StepLen) {
2263 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2264 "Unexpected element width.");
2265 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2266
2267 unsigned NumOfElem = 16 / Width;
2268 unsigned MaskVal[16]; // Width is never greater than 16
2269 for (unsigned i = 0; i < NumOfElem; ++i) {
2270 MaskVal[0] = N->getMaskElt(i * Width);
2271 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2272 return false;
2273 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2274 return false;
2275 }
2276
2277 for (unsigned int j = 1; j < Width; ++j) {
2278 MaskVal[j] = N->getMaskElt(i * Width + j);
2279 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2280 return false;
2281 }
2282 }
2283 }
2284
2285 return true;
2286}
2287
2288bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2289 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2290 if (!isNByteElemShuffleMask(N, 4, 1))
2291 return false;
2292
2293 // Now we look at mask elements 0,4,8,12
2294 unsigned M0 = N->getMaskElt(0) / 4;
2295 unsigned M1 = N->getMaskElt(4) / 4;
2296 unsigned M2 = N->getMaskElt(8) / 4;
2297 unsigned M3 = N->getMaskElt(12) / 4;
2298 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2299 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2300
2301 // Below, let H and L be arbitrary elements of the shuffle mask
2302 // where H is in the range [4,7] and L is in the range [0,3].
2303 // H, 1, 2, 3 or L, 5, 6, 7
2304 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2305 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2306 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2307 InsertAtByte = IsLE ? 12 : 0;
2308 Swap = M0 < 4;
2309 return true;
2310 }
2311 // 0, H, 2, 3 or 4, L, 6, 7
2312 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2313 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2314 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2315 InsertAtByte = IsLE ? 8 : 4;
2316 Swap = M1 < 4;
2317 return true;
2318 }
2319 // 0, 1, H, 3 or 4, 5, L, 7
2320 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2321 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2322 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2323 InsertAtByte = IsLE ? 4 : 8;
2324 Swap = M2 < 4;
2325 return true;
2326 }
2327 // 0, 1, 2, H or 4, 5, 6, L
2328 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2329 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2330 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2331 InsertAtByte = IsLE ? 0 : 12;
2332 Swap = M3 < 4;
2333 return true;
2334 }
2335
2336 // If both vector operands for the shuffle are the same vector, the mask will
2337 // contain only elements from the first one and the second one will be undef.
2338 if (N->getOperand(1).isUndef()) {
2339 ShiftElts = 0;
2340 Swap = true;
2341 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2342 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2343 InsertAtByte = IsLE ? 12 : 0;
2344 return true;
2345 }
2346 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2347 InsertAtByte = IsLE ? 8 : 4;
2348 return true;
2349 }
2350 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2351 InsertAtByte = IsLE ? 4 : 8;
2352 return true;
2353 }
2354 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2355 InsertAtByte = IsLE ? 0 : 12;
2356 return true;
2357 }
2358 }
2359
2360 return false;
2361}
2362
2364 bool &Swap, bool IsLE) {
2365 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2366 // Ensure each byte index of the word is consecutive.
2367 if (!isNByteElemShuffleMask(N, 4, 1))
2368 return false;
2369
2370 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2371 unsigned M0 = N->getMaskElt(0) / 4;
2372 unsigned M1 = N->getMaskElt(4) / 4;
2373 unsigned M2 = N->getMaskElt(8) / 4;
2374 unsigned M3 = N->getMaskElt(12) / 4;
2375
2376 // If both vector operands for the shuffle are the same vector, the mask will
2377 // contain only elements from the first one and the second one will be undef.
2378 if (N->getOperand(1).isUndef()) {
2379 assert(M0 < 4 && "Indexing into an undef vector?");
2380 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2381 return false;
2382
2383 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2384 Swap = false;
2385 return true;
2386 }
2387
2388 // Ensure each word index of the ShuffleVector Mask is consecutive.
2389 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2390 return false;
2391
2392 if (IsLE) {
2393 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2394 // Input vectors don't need to be swapped if the leading element
2395 // of the result is one of the 3 left elements of the second vector
2396 // (or if there is no shift to be done at all).
2397 Swap = false;
2398 ShiftElts = (8 - M0) % 8;
2399 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2400 // Input vectors need to be swapped if the leading element
2401 // of the result is one of the 3 left elements of the first vector
2402 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2403 Swap = true;
2404 ShiftElts = (4 - M0) % 4;
2405 }
2406
2407 return true;
2408 } else { // BE
2409 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2410 // Input vectors don't need to be swapped if the leading element
2411 // of the result is one of the 4 elements of the first vector.
2412 Swap = false;
2413 ShiftElts = M0;
2414 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2415 // Input vectors need to be swapped if the leading element
2416 // of the result is one of the 4 elements of the right vector.
2417 Swap = true;
2418 ShiftElts = M0 - 4;
2419 }
2420
2421 return true;
2422 }
2423}
2424
2426 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2427
2428 if (!isNByteElemShuffleMask(N, Width, -1))
2429 return false;
2430
2431 for (int i = 0; i < 16; i += Width)
2432 if (N->getMaskElt(i) != i + Width - 1)
2433 return false;
2434
2435 return true;
2436}
2437
2439 return isXXBRShuffleMaskHelper(N, 2);
2440}
2441
2443 return isXXBRShuffleMaskHelper(N, 4);
2444}
2445
2447 return isXXBRShuffleMaskHelper(N, 8);
2448}
2449
2451 return isXXBRShuffleMaskHelper(N, 16);
2452}
2453
2454/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2455/// if the inputs to the instruction should be swapped and set \p DM to the
2456/// value for the immediate.
2457/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2458/// AND element 0 of the result comes from the first input (LE) or second input
2459/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2460/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2461/// mask.
2463 bool &Swap, bool IsLE) {
2464 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2465
2466 // Ensure each byte index of the double word is consecutive.
2467 if (!isNByteElemShuffleMask(N, 8, 1))
2468 return false;
2469
2470 unsigned M0 = N->getMaskElt(0) / 8;
2471 unsigned M1 = N->getMaskElt(8) / 8;
2472 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2473
2474 // If both vector operands for the shuffle are the same vector, the mask will
2475 // contain only elements from the first one and the second one will be undef.
2476 if (N->getOperand(1).isUndef()) {
2477 if ((M0 | M1) < 2) {
2478 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2479 Swap = false;
2480 return true;
2481 } else
2482 return false;
2483 }
2484
2485 if (IsLE) {
2486 if (M0 > 1 && M1 < 2) {
2487 Swap = false;
2488 } else if (M0 < 2 && M1 > 1) {
2489 M0 = (M0 + 2) % 4;
2490 M1 = (M1 + 2) % 4;
2491 Swap = true;
2492 } else
2493 return false;
2494
2495 // Note: if control flow comes here that means Swap is already set above
2496 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2497 return true;
2498 } else { // BE
2499 if (M0 < 2 && M1 > 1) {
2500 Swap = false;
2501 } else if (M0 > 1 && M1 < 2) {
2502 M0 = (M0 + 2) % 4;
2503 M1 = (M1 + 2) % 4;
2504 Swap = true;
2505 } else
2506 return false;
2507
2508 // Note: if control flow comes here that means Swap is already set above
2509 DM = (M0 << 1) + (M1 & 1);
2510 return true;
2511 }
2512}
2513
2514
2515/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2516/// appropriate for PPC mnemonics (which have a big endian bias - namely
2517/// elements are counted from the left of the vector register).
2518unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2519 SelectionDAG &DAG) {
2520 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2521 assert(isSplatShuffleMask(SVOp, EltSize));
2522 EVT VT = SVOp->getValueType(0);
2523
2524 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2525 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2526 : SVOp->getMaskElt(0);
2527
2528 if (DAG.getDataLayout().isLittleEndian())
2529 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2530 else
2531 return SVOp->getMaskElt(0) / EltSize;
2532}
2533
2534/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2535/// by using a vspltis[bhw] instruction of the specified element size, return
2536/// the constant being splatted. The ByteSize field indicates the number of
2537/// bytes of each element [124] -> [bhw].
2539 SDValue OpVal;
2540
2541 // If ByteSize of the splat is bigger than the element size of the
2542 // build_vector, then we have a case where we are checking for a splat where
2543 // multiple elements of the buildvector are folded together into a single
2544 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2545 unsigned EltSize = 16/N->getNumOperands();
2546 if (EltSize < ByteSize) {
2547 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2548 SDValue UniquedVals[4];
2549 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2550
2551 // See if all of the elements in the buildvector agree across.
2552 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2553 if (N->getOperand(i).isUndef()) continue;
2554 // If the element isn't a constant, bail fully out.
2555 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2556
2557 if (!UniquedVals[i&(Multiple-1)].getNode())
2558 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2559 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2560 return SDValue(); // no match.
2561 }
2562
2563 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2564 // either constant or undef values that are identical for each chunk. See
2565 // if these chunks can form into a larger vspltis*.
2566
2567 // Check to see if all of the leading entries are either 0 or -1. If
2568 // neither, then this won't fit into the immediate field.
2569 bool LeadingZero = true;
2570 bool LeadingOnes = true;
2571 for (unsigned i = 0; i != Multiple-1; ++i) {
2572 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2573
2574 LeadingZero &= isNullConstant(UniquedVals[i]);
2575 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2576 }
2577 // Finally, check the least significant entry.
2578 if (LeadingZero) {
2579 if (!UniquedVals[Multiple-1].getNode())
2580 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2581 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2582 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2583 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2584 }
2585 if (LeadingOnes) {
2586 if (!UniquedVals[Multiple-1].getNode())
2587 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2588 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2589 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2590 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2591 }
2592
2593 return SDValue();
2594 }
2595
2596 // Check to see if this buildvec has a single non-undef value in its elements.
2597 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2598 if (N->getOperand(i).isUndef()) continue;
2599 if (!OpVal.getNode())
2600 OpVal = N->getOperand(i);
2601 else if (OpVal != N->getOperand(i))
2602 return SDValue();
2603 }
2604
2605 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2606
2607 unsigned ValSizeInBytes = EltSize;
2608 uint64_t Value = 0;
2609 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2610 Value = CN->getZExtValue();
2611 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2612 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2613 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2614 }
2615
2616 // If the splat value is larger than the element value, then we can never do
2617 // this splat. The only case that we could fit the replicated bits into our
2618 // immediate field for would be zero, and we prefer to use vxor for it.
2619 if (ValSizeInBytes < ByteSize) return SDValue();
2620
2621 // If the element value is larger than the splat value, check if it consists
2622 // of a repeated bit pattern of size ByteSize.
2623 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2624 return SDValue();
2625
2626 // Properly sign extend the value.
2627 int MaskVal = SignExtend32(Value, ByteSize * 8);
2628
2629 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2630 if (MaskVal == 0) return SDValue();
2631
2632 // Finally, if this value fits in a 5 bit sext field, return it
2633 if (SignExtend32<5>(MaskVal) == MaskVal)
2634 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2635 return SDValue();
2636}
2637
2638//===----------------------------------------------------------------------===//
2639// Addressing Mode Selection
2640//===----------------------------------------------------------------------===//
2641
2642/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2643/// or 64-bit immediate, and if the value can be accurately represented as a
2644/// sign extension from a 16-bit value. If so, this returns true and the
2645/// immediate.
2646bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2647 if (!isa<ConstantSDNode>(N))
2648 return false;
2649
2650 Imm = (int16_t)N->getAsZExtVal();
2651 if (N->getValueType(0) == MVT::i32)
2652 return Imm == (int32_t)N->getAsZExtVal();
2653 else
2654 return Imm == (int64_t)N->getAsZExtVal();
2655}
2657 return isIntS16Immediate(Op.getNode(), Imm);
2658}
2659
2660/// Used when computing address flags for selecting loads and stores.
2661/// If we have an OR, check if the LHS and RHS are provably disjoint.
2662/// An OR of two provably disjoint values is equivalent to an ADD.
2663/// Most PPC load/store instructions compute the effective address as a sum,
2664/// so doing this conversion is useful.
2665static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2666 if (N.getOpcode() != ISD::OR)
2667 return false;
2668 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2669 if (!LHSKnown.Zero.getBoolValue())
2670 return false;
2671 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2672 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2673}
2674
2675/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2676/// be represented as an indexed [r+r] operation.
2678 SDValue &Index,
2679 SelectionDAG &DAG) const {
2680 for (SDNode *U : N->users()) {
2681 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2682 if (Memop->getMemoryVT() == MVT::f64) {
2683 Base = N.getOperand(0);
2684 Index = N.getOperand(1);
2685 return true;
2686 }
2687 }
2688 }
2689 return false;
2690}
2691
2692/// isIntS34Immediate - This method tests if value of node given can be
2693/// accurately represented as a sign extension from a 34-bit value. If so,
2694/// this returns true and the immediate.
2695bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2696 if (!isa<ConstantSDNode>(N))
2697 return false;
2698
2699 Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue();
2700 return isInt<34>(Imm);
2701}
2703 return isIntS34Immediate(Op.getNode(), Imm);
2704}
2705
2706/// SelectAddressRegReg - Given the specified addressed, check to see if it
2707/// can be represented as an indexed [r+r] operation. Returns false if it
2708/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2709/// non-zero and N can be represented by a base register plus a signed 16-bit
2710/// displacement, make a more precise judgement by checking (displacement % \p
2711/// EncodingAlignment).
2713 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2714 MaybeAlign EncodingAlignment) const {
2715 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2716 // a [pc+imm].
2718 return false;
2719
2720 int16_t Imm = 0;
2721 if (N.getOpcode() == ISD::ADD) {
2722 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2723 // SPE load/store can only handle 8-bit offsets.
2724 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2725 return true;
2726 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2727 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2728 return false; // r+i
2729 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2730 return false; // r+i
2731
2732 Base = N.getOperand(0);
2733 Index = N.getOperand(1);
2734 return true;
2735 } else if (N.getOpcode() == ISD::OR) {
2736 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2737 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2738 return false; // r+i can fold it if we can.
2739
2740 // If this is an or of disjoint bitfields, we can codegen this as an add
2741 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2742 // disjoint.
2743 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2744
2745 if (LHSKnown.Zero.getBoolValue()) {
2746 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2747 // If all of the bits are known zero on the LHS or RHS, the add won't
2748 // carry.
2749 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2750 Base = N.getOperand(0);
2751 Index = N.getOperand(1);
2752 return true;
2753 }
2754 }
2755 }
2756
2757 return false;
2758}
2759
2760// If we happen to be doing an i64 load or store into a stack slot that has
2761// less than a 4-byte alignment, then the frame-index elimination may need to
2762// use an indexed load or store instruction (because the offset may not be a
2763// multiple of 4). The extra register needed to hold the offset comes from the
2764// register scavenger, and it is possible that the scavenger will need to use
2765// an emergency spill slot. As a result, we need to make sure that a spill slot
2766// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2767// stack slot.
2768static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2769 // FIXME: This does not handle the LWA case.
2770 if (VT != MVT::i64)
2771 return;
2772
2773 // NOTE: We'll exclude negative FIs here, which come from argument
2774 // lowering, because there are no known test cases triggering this problem
2775 // using packed structures (or similar). We can remove this exclusion if
2776 // we find such a test case. The reason why this is so test-case driven is
2777 // because this entire 'fixup' is only to prevent crashes (from the
2778 // register scavenger) on not-really-valid inputs. For example, if we have:
2779 // %a = alloca i1
2780 // %b = bitcast i1* %a to i64*
2781 // store i64* a, i64 b
2782 // then the store should really be marked as 'align 1', but is not. If it
2783 // were marked as 'align 1' then the indexed form would have been
2784 // instruction-selected initially, and the problem this 'fixup' is preventing
2785 // won't happen regardless.
2786 if (FrameIdx < 0)
2787 return;
2788
2790 MachineFrameInfo &MFI = MF.getFrameInfo();
2791
2792 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2793 return;
2794
2795 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2796 FuncInfo->setHasNonRISpills();
2797}
2798
2799/// Returns true if the address N can be represented by a base register plus
2800/// a signed 16-bit displacement [r+imm], and if it is not better
2801/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2802/// displacements that are multiples of that value.
2804 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2805 MaybeAlign EncodingAlignment) const {
2806 // FIXME dl should come from parent load or store, not from address
2807 SDLoc dl(N);
2808
2809 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2810 // a [pc+imm].
2812 return false;
2813
2814 // If this can be more profitably realized as r+r, fail.
2815 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2816 return false;
2817
2818 if (N.getOpcode() == ISD::ADD) {
2819 int16_t imm = 0;
2820 if (isIntS16Immediate(N.getOperand(1), imm) &&
2821 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2822 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2823 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2824 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2825 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2826 } else {
2827 Base = N.getOperand(0);
2828 }
2829 return true; // [r+i]
2830 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2831 // Match LOAD (ADD (X, Lo(G))).
2832 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2833 "Cannot handle constant offsets yet!");
2834 Disp = N.getOperand(1).getOperand(0); // The global address.
2839 Base = N.getOperand(0);
2840 return true; // [&g+r]
2841 }
2842 } else if (N.getOpcode() == ISD::OR) {
2843 int16_t imm = 0;
2844 if (isIntS16Immediate(N.getOperand(1), imm) &&
2845 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2846 // If this is an or of disjoint bitfields, we can codegen this as an add
2847 // (for better address arithmetic) if the LHS and RHS of the OR are
2848 // provably disjoint.
2849 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2850
2851 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2852 // If all of the bits are known zero on the LHS or RHS, the add won't
2853 // carry.
2854 if (FrameIndexSDNode *FI =
2855 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2856 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2857 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2858 } else {
2859 Base = N.getOperand(0);
2860 }
2861 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2862 return true;
2863 }
2864 }
2865 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2866 // Loading from a constant address.
2867
2868 // If this address fits entirely in a 16-bit sext immediate field, codegen
2869 // this as "d, 0"
2870 int16_t Imm;
2871 if (isIntS16Immediate(CN, Imm) &&
2872 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2873 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2874 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2875 CN->getValueType(0));
2876 return true;
2877 }
2878
2879 // Handle 32-bit sext immediates with LIS + addr mode.
2880 if ((CN->getValueType(0) == MVT::i32 ||
2881 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2882 (!EncodingAlignment ||
2883 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2884 int Addr = (int)CN->getZExtValue();
2885
2886 // Otherwise, break this down into an LIS + disp.
2887 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2888
2889 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2890 MVT::i32);
2891 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2892 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2893 return true;
2894 }
2895 }
2896
2897 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2898 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2899 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2900 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2901 } else
2902 Base = N;
2903 return true; // [r+0]
2904}
2905
2906/// Similar to the 16-bit case but for instructions that take a 34-bit
2907/// displacement field (prefixed loads/stores).
2909 SDValue &Base,
2910 SelectionDAG &DAG) const {
2911 // Only on 64-bit targets.
2912 if (N.getValueType() != MVT::i64)
2913 return false;
2914
2915 SDLoc dl(N);
2916 int64_t Imm = 0;
2917
2918 if (N.getOpcode() == ISD::ADD) {
2919 if (!isIntS34Immediate(N.getOperand(1), Imm))
2920 return false;
2921 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2922 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2923 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2924 else
2925 Base = N.getOperand(0);
2926 return true;
2927 }
2928
2929 if (N.getOpcode() == ISD::OR) {
2930 if (!isIntS34Immediate(N.getOperand(1), Imm))
2931 return false;
2932 // If this is an or of disjoint bitfields, we can codegen this as an add
2933 // (for better address arithmetic) if the LHS and RHS of the OR are
2934 // provably disjoint.
2935 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2936 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2937 return false;
2938 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2939 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2940 else
2941 Base = N.getOperand(0);
2942 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2943 return true;
2944 }
2945
2946 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2947 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2948 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2949 return true;
2950 }
2951
2952 return false;
2953}
2954
2955/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2956/// represented as an indexed [r+r] operation.
2958 SDValue &Index,
2959 SelectionDAG &DAG) const {
2960 // Check to see if we can easily represent this as an [r+r] address. This
2961 // will fail if it thinks that the address is more profitably represented as
2962 // reg+imm, e.g. where imm = 0.
2963 if (SelectAddressRegReg(N, Base, Index, DAG))
2964 return true;
2965
2966 // If the address is the result of an add, we will utilize the fact that the
2967 // address calculation includes an implicit add. However, we can reduce
2968 // register pressure if we do not materialize a constant just for use as the
2969 // index register. We only get rid of the add if it is not an add of a
2970 // value and a 16-bit signed constant and both have a single use.
2971 int16_t imm = 0;
2972 if (N.getOpcode() == ISD::ADD &&
2973 (!isIntS16Immediate(N.getOperand(1), imm) ||
2974 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2975 Base = N.getOperand(0);
2976 Index = N.getOperand(1);
2977 return true;
2978 }
2979
2980 // Otherwise, do it the hard way, using R0 as the base register.
2981 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2982 N.getValueType());
2983 Index = N;
2984 return true;
2985}
2986
2987template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2988 Ty *PCRelCand = dyn_cast<Ty>(N);
2989 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2990}
2991
2992/// Returns true if this address is a PC Relative address.
2993/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2994/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2996 // This is a materialize PC Relative node. Always select this as PC Relative.
2997 Base = N;
2998 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2999 return true;
3000 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3001 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3002 isValidPCRelNode<JumpTableSDNode>(N) ||
3003 isValidPCRelNode<BlockAddressSDNode>(N))
3004 return true;
3005 return false;
3006}
3007
3008/// Returns true if we should use a direct load into vector instruction
3009/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3010static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3011
3012 // If there are any other uses other than scalar to vector, then we should
3013 // keep it as a scalar load -> direct move pattern to prevent multiple
3014 // loads.
3015 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3016 if (!LD)
3017 return false;
3018
3019 EVT MemVT = LD->getMemoryVT();
3020 if (!MemVT.isSimple())
3021 return false;
3022 switch(MemVT.getSimpleVT().SimpleTy) {
3023 case MVT::i64:
3024 break;
3025 case MVT::i32:
3026 if (!ST.hasP8Vector())
3027 return false;
3028 break;
3029 case MVT::i16:
3030 case MVT::i8:
3031 if (!ST.hasP9Vector())
3032 return false;
3033 break;
3034 default:
3035 return false;
3036 }
3037
3038 SDValue LoadedVal(N, 0);
3039 if (!LoadedVal.hasOneUse())
3040 return false;
3041
3042 for (SDUse &Use : LD->uses())
3043 if (Use.getResNo() == 0 &&
3044 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3046 return false;
3047
3048 return true;
3049}
3050
3051/// getPreIndexedAddressParts - returns true by value, base pointer and
3052/// offset pointer and addressing mode by reference if the node's address
3053/// can be legally represented as pre-indexed load / store address.
3055 SDValue &Offset,
3057 SelectionDAG &DAG) const {
3058 if (DisablePPCPreinc) return false;
3059
3060 bool isLoad = true;
3061 SDValue Ptr;
3062 EVT VT;
3063 Align Alignment;
3064 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3065 Ptr = LD->getBasePtr();
3066 VT = LD->getMemoryVT();
3067 Alignment = LD->getAlign();
3068 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3069 Ptr = ST->getBasePtr();
3070 VT = ST->getMemoryVT();
3071 Alignment = ST->getAlign();
3072 isLoad = false;
3073 } else
3074 return false;
3075
3076 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3077 // instructions because we can fold these into a more efficient instruction
3078 // instead, (such as LXSD).
3079 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3080 return false;
3081 }
3082
3083 // PowerPC doesn't have preinc load/store instructions for vectors
3084 if (VT.isVector())
3085 return false;
3086
3087 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3088 // Common code will reject creating a pre-inc form if the base pointer
3089 // is a frame index, or if N is a store and the base pointer is either
3090 // the same as or a predecessor of the value being stored. Check for
3091 // those situations here, and try with swapped Base/Offset instead.
3092 bool Swap = false;
3093
3094 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3095 Swap = true;
3096 else if (!isLoad) {
3097 SDValue Val = cast<StoreSDNode>(N)->getValue();
3098 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3099 Swap = true;
3100 }
3101
3102 if (Swap)
3104
3105 AM = ISD::PRE_INC;
3106 return true;
3107 }
3108
3109 // LDU/STU can only handle immediates that are a multiple of 4.
3110 if (VT != MVT::i64) {
3111 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3112 return false;
3113 } else {
3114 // LDU/STU need an address with at least 4-byte alignment.
3115 if (Alignment < Align(4))
3116 return false;
3117
3118 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3119 return false;
3120 }
3121
3122 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3123 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3124 // sext i32 to i64 when addr mode is r+i.
3125 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3126 LD->getExtensionType() == ISD::SEXTLOAD &&
3127 isa<ConstantSDNode>(Offset))
3128 return false;
3129 }
3130
3131 AM = ISD::PRE_INC;
3132 return true;
3133}
3134
3135//===----------------------------------------------------------------------===//
3136// LowerOperation implementation
3137//===----------------------------------------------------------------------===//
3138
3139/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3140/// and LoOpFlags to the target MO flags.
3141static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3142 unsigned &HiOpFlags, unsigned &LoOpFlags,
3143 const GlobalValue *GV = nullptr) {
3144 HiOpFlags = PPCII::MO_HA;
3145 LoOpFlags = PPCII::MO_LO;
3146
3147 // Don't use the pic base if not in PIC relocation model.
3148 if (IsPIC) {
3149 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3150 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3151 }
3152}
3153
3154static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3155 SelectionDAG &DAG) {
3156 SDLoc DL(HiPart);
3157 EVT PtrVT = HiPart.getValueType();
3158 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3159
3160 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3161 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3162
3163 // With PIC, the first instruction is actually "GR+hi(&G)".
3164 if (isPIC)
3165 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3166 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3167
3168 // Generate non-pic code that has direct accesses to the constant pool.
3169 // The address of the global is just (hi(&g)+lo(&g)).
3170 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3171}
3172
3174 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3175 FuncInfo->setUsesTOCBasePtr();
3176}
3177
3180}
3181
3182SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3183 SDValue GA) const {
3184 EVT VT = Subtarget.getScalarIntVT();
3185 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3186 : Subtarget.isAIXABI()
3187 ? DAG.getRegister(PPC::R2, VT)
3188 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3189 SDValue Ops[] = { GA, Reg };
3190 return DAG.getMemIntrinsicNode(
3191 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3194}
3195
3196SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3197 SelectionDAG &DAG) const {
3198 EVT PtrVT = Op.getValueType();
3199 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3200 const Constant *C = CP->getConstVal();
3201
3202 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3203 // The actual address of the GlobalValue is stored in the TOC.
3204 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3205 if (Subtarget.isUsingPCRelativeCalls()) {
3206 SDLoc DL(CP);
3207 EVT Ty = getPointerTy(DAG.getDataLayout());
3208 SDValue ConstPool = DAG.getTargetConstantPool(
3209 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3210 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3211 }
3212 setUsesTOCBasePtr(DAG);
3213 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3214 return getTOCEntry(DAG, SDLoc(CP), GA);
3215 }
3216
3217 unsigned MOHiFlag, MOLoFlag;
3218 bool IsPIC = isPositionIndependent();
3219 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3220
3221 if (IsPIC && Subtarget.isSVR4ABI()) {
3222 SDValue GA =
3223 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3224 return getTOCEntry(DAG, SDLoc(CP), GA);
3225 }
3226
3227 SDValue CPIHi =
3228 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3229 SDValue CPILo =
3230 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3231 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3232}
3233
3234// For 64-bit PowerPC, prefer the more compact relative encodings.
3235// This trades 32 bits per jump table entry for one or two instructions
3236// on the jump site.
3238 if (isJumpTableRelative())
3240
3242}
3243
3246 return false;
3247 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3248 return true;
3250}
3251
3253 SelectionDAG &DAG) const {
3254 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3256
3257 switch (getTargetMachine().getCodeModel()) {
3258 case CodeModel::Small:
3259 case CodeModel::Medium:
3261 default:
3262 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3264 }
3265}
3266
3267const MCExpr *
3269 unsigned JTI,
3270 MCContext &Ctx) const {
3271 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3273
3274 switch (getTargetMachine().getCodeModel()) {
3275 case CodeModel::Small:
3276 case CodeModel::Medium:
3278 default:
3279 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3280 }
3281}
3282
3283SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3284 EVT PtrVT = Op.getValueType();
3285 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3286
3287 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3288 if (Subtarget.isUsingPCRelativeCalls()) {
3289 SDLoc DL(JT);
3290 EVT Ty = getPointerTy(DAG.getDataLayout());
3291 SDValue GA =
3292 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3293 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3294 return MatAddr;
3295 }
3296
3297 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3298 // The actual address of the GlobalValue is stored in the TOC.
3299 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3300 setUsesTOCBasePtr(DAG);
3301 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3302 return getTOCEntry(DAG, SDLoc(JT), GA);
3303 }
3304
3305 unsigned MOHiFlag, MOLoFlag;
3306 bool IsPIC = isPositionIndependent();
3307 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3308
3309 if (IsPIC && Subtarget.isSVR4ABI()) {
3310 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3312 return getTOCEntry(DAG, SDLoc(GA), GA);
3313 }
3314
3315 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3316 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3317 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3318}
3319
3320SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3321 SelectionDAG &DAG) const {
3322 EVT PtrVT = Op.getValueType();
3323 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3324 const BlockAddress *BA = BASDN->getBlockAddress();
3325
3326 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3327 if (Subtarget.isUsingPCRelativeCalls()) {
3328 SDLoc DL(BASDN);
3329 EVT Ty = getPointerTy(DAG.getDataLayout());
3330 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3332 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3333 return MatAddr;
3334 }
3335
3336 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3337 // The actual BlockAddress is stored in the TOC.
3338 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3339 setUsesTOCBasePtr(DAG);
3340 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3341 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3342 }
3343
3344 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3345 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3346 return getTOCEntry(
3347 DAG, SDLoc(BASDN),
3348 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3349
3350 unsigned MOHiFlag, MOLoFlag;
3351 bool IsPIC = isPositionIndependent();
3352 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3353 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3354 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3355 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3356}
3357
3358SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3359 SelectionDAG &DAG) const {
3360 if (Subtarget.isAIXABI())
3361 return LowerGlobalTLSAddressAIX(Op, DAG);
3362
3363 return LowerGlobalTLSAddressLinux(Op, DAG);
3364}
3365
3366/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3367/// and then apply the update.
3369 SelectionDAG &DAG,
3370 const TargetMachine &TM) {
3371 // Initialize TLS model opt setting lazily:
3372 // (1) Use initial-exec for single TLS var references within current function.
3373 // (2) Use local-dynamic for multiple TLS var references within current
3374 // function.
3375 PPCFunctionInfo *FuncInfo =
3377 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3379 // Iterate over all instructions within current function, collect all TLS
3380 // global variables (global variables taken as the first parameter to
3381 // Intrinsic::threadlocal_address).
3382 const Function &Func = DAG.getMachineFunction().getFunction();
3383 for (const BasicBlock &BB : Func)
3384 for (const Instruction &I : BB)
3385 if (I.getOpcode() == Instruction::Call)
3386 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3387 if (Function *CF = CI->getCalledFunction())
3388 if (CF->isDeclaration() &&
3389 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3390 if (const GlobalValue *GV =
3391 dyn_cast<GlobalValue>(I.getOperand(0))) {
3392 TLSModel::Model GVModel = TM.getTLSModel(GV);
3393 if (GVModel == TLSModel::LocalDynamic)
3394 TLSGV.insert(GV);
3395 }
3396
3397 unsigned TLSGVCnt = TLSGV.size();
3398 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3399 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3400 FuncInfo->setAIXFuncUseTLSIEForLD();
3402 }
3403
3404 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3405 LLVM_DEBUG(
3406 dbgs() << DAG.getMachineFunction().getName()
3407 << " function is using the TLS-IE model for TLS-LD access.\n");
3408 Model = TLSModel::InitialExec;
3409 }
3410}
3411
3412SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3413 SelectionDAG &DAG) const {
3414 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3415
3416 if (DAG.getTarget().useEmulatedTLS())
3417 report_fatal_error("Emulated TLS is not yet supported on AIX");
3418
3419 SDLoc dl(GA);
3420 const GlobalValue *GV = GA->getGlobal();
3421 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3422 bool Is64Bit = Subtarget.isPPC64();
3424
3425 // Apply update to the TLS model.
3426 if (Subtarget.hasAIXShLibTLSModelOpt())
3428
3429 // TLS variables are accessed through TOC entries.
3430 // To support this, set the DAG to use the TOC base pointer.
3431 setUsesTOCBasePtr(DAG);
3432
3433 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3434
3435 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3436 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3437 bool HasAIXSmallTLSGlobalAttr = false;
3438 SDValue VariableOffsetTGA =
3439 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3440 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3441 SDValue TLSReg;
3442
3443 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3444 if (GVar->hasAttribute("aix-small-tls"))
3445 HasAIXSmallTLSGlobalAttr = true;
3446
3447 if (Is64Bit) {
3448 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3449 // involves a load of the variable offset (from the TOC), followed by an
3450 // add of the loaded variable offset to R13 (the thread pointer).
3451 // This code sequence looks like:
3452 // ld reg1,var[TC](2)
3453 // add reg2, reg1, r13 // r13 contains the thread pointer
3454 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3455
3456 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3457 // global variable attribute, produce a faster access sequence for
3458 // local-exec TLS variables where the offset from the TLS base is encoded
3459 // as an immediate operand.
3460 //
3461 // We only utilize the faster local-exec access sequence when the TLS
3462 // variable has a size within the policy limit. We treat types that are
3463 // not sized or are empty as being over the policy size limit.
3464 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3465 IsTLSLocalExecModel) {
3466 Type *GVType = GV->getValueType();
3467 if (GVType->isSized() && !GVType->isEmptyTy() &&
3468 GV->getDataLayout().getTypeAllocSize(GVType) <=
3470 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3471 }
3472 } else {
3473 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3474 // involves loading the variable offset from the TOC, generating a call to
3475 // .__get_tpointer to get the thread pointer (which will be in R3), and
3476 // adding the two together:
3477 // lwz reg1,var[TC](2)
3478 // bla .__get_tpointer
3479 // add reg2, reg1, r3
3480 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3481
3482 // We do not implement the 32-bit version of the faster access sequence
3483 // for local-exec that is controlled by the -maix-small-local-exec-tls
3484 // option, or the "aix-small-tls" global variable attribute.
3485 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3486 report_fatal_error("The small-local-exec TLS access sequence is "
3487 "currently only supported on AIX (64-bit mode).");
3488 }
3489 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3490 }
3491
3492 if (Model == TLSModel::LocalDynamic) {
3493 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3494
3495 // We do not implement the 32-bit version of the faster access sequence
3496 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3497 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3498 report_fatal_error("The small-local-dynamic TLS access sequence is "
3499 "currently only supported on AIX (64-bit mode).");
3500
3501 // For local-dynamic on AIX, we need to generate one TOC entry for each
3502 // variable offset, and a single module-handle TOC entry for the entire
3503 // file.
3504
3505 SDValue VariableOffsetTGA =
3506 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3507 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3508
3510 GlobalVariable *TLSGV =
3511 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3512 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3514 assert(TLSGV && "Not able to create GV for _$TLSML.");
3515 SDValue ModuleHandleTGA =
3516 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3517 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3518 SDValue ModuleHandle =
3519 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3520
3521 // With the -maix-small-local-dynamic-tls option, produce a faster access
3522 // sequence for local-dynamic TLS variables where the offset from the
3523 // module-handle is encoded as an immediate operand.
3524 //
3525 // We only utilize the faster local-dynamic access sequence when the TLS
3526 // variable has a size within the policy limit. We treat types that are
3527 // not sized or are empty as being over the policy size limit.
3528 if (HasAIXSmallLocalDynamicTLS) {
3529 Type *GVType = GV->getValueType();
3530 if (GVType->isSized() && !GVType->isEmptyTy() &&
3531 GV->getDataLayout().getTypeAllocSize(GVType) <=
3533 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3534 ModuleHandle);
3535 }
3536
3537 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3538 }
3539
3540 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3541 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3542 // need to generate two TOC entries, one for the variable offset, one for the
3543 // region handle. The global address for the TOC entry of the region handle is
3544 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3545 // entry of the variable offset is created with MO_TLSGD_FLAG.
3546 SDValue VariableOffsetTGA =
3547 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3548 SDValue RegionHandleTGA =
3549 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3550 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3551 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3552 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3553 RegionHandle);
3554}
3555
3556SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3557 SelectionDAG &DAG) const {
3558 // FIXME: TLS addresses currently use medium model code sequences,
3559 // which is the most useful form. Eventually support for small and
3560 // large models could be added if users need it, at the cost of
3561 // additional complexity.
3562 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3563 if (DAG.getTarget().useEmulatedTLS())
3564 return LowerToTLSEmulatedModel(GA, DAG);
3565
3566 SDLoc dl(GA);
3567 const GlobalValue *GV = GA->getGlobal();
3568 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3569 bool is64bit = Subtarget.isPPC64();
3570 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3571 PICLevel::Level picLevel = M->getPICLevel();
3572
3574 TLSModel::Model Model = TM.getTLSModel(GV);
3575
3576 if (Model == TLSModel::LocalExec) {
3577 if (Subtarget.isUsingPCRelativeCalls()) {
3578 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3579 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3581 SDValue MatAddr =
3582 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3583 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3584 }
3585
3586 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3588 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3590 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3591 : DAG.getRegister(PPC::R2, MVT::i32);
3592
3593 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3594 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3595 }
3596
3597 if (Model == TLSModel::InitialExec) {
3598 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3600 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3601 SDValue TGATLS = DAG.getTargetGlobalAddress(
3602 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3603 SDValue TPOffset;
3604 if (IsPCRel) {
3605 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3606 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3608 } else {
3609 SDValue GOTPtr;
3610 if (is64bit) {
3611 setUsesTOCBasePtr(DAG);
3612 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3613 GOTPtr =
3614 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3615 } else {
3616 if (!TM.isPositionIndependent())
3617 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3618 else if (picLevel == PICLevel::SmallPIC)
3619 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3620 else
3621 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3622 }
3623 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3624 }
3625 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3626 }
3627
3628 if (Model == TLSModel::GeneralDynamic) {
3629 if (Subtarget.isUsingPCRelativeCalls()) {
3630 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3632 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3633 }
3634
3635 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3636 SDValue GOTPtr;
3637 if (is64bit) {
3638 setUsesTOCBasePtr(DAG);
3639 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3640 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3641 GOTReg, TGA);
3642 } else {
3643 if (picLevel == PICLevel::SmallPIC)
3644 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3645 else
3646 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3647 }
3648 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3649 GOTPtr, TGA, TGA);
3650 }
3651
3652 if (Model == TLSModel::LocalDynamic) {
3653 if (Subtarget.isUsingPCRelativeCalls()) {
3654 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3656 SDValue MatPCRel =
3657 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3658 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3659 }
3660
3661 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3662 SDValue GOTPtr;
3663 if (is64bit) {
3664 setUsesTOCBasePtr(DAG);
3665 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3666 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3667 GOTReg, TGA);
3668 } else {
3669 if (picLevel == PICLevel::SmallPIC)
3670 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3671 else
3672 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3673 }
3674 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3675 PtrVT, GOTPtr, TGA, TGA);
3676 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3677 PtrVT, TLSAddr, TGA);
3678 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3679 }
3680
3681 llvm_unreachable("Unknown TLS model!");
3682}
3683
3684SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3685 SelectionDAG &DAG) const {
3686 EVT PtrVT = Op.getValueType();
3687 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3688 SDLoc DL(GSDN);
3689 const GlobalValue *GV = GSDN->getGlobal();
3690
3691 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3692 // The actual address of the GlobalValue is stored in the TOC.
3693 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3694 if (Subtarget.isUsingPCRelativeCalls()) {
3695 EVT Ty = getPointerTy(DAG.getDataLayout());
3697 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3699 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3700 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3702 return Load;
3703 } else {
3704 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3706 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3707 }
3708 }
3709 setUsesTOCBasePtr(DAG);
3710 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3711 return getTOCEntry(DAG, DL, GA);
3712 }
3713
3714 unsigned MOHiFlag, MOLoFlag;
3715 bool IsPIC = isPositionIndependent();
3716 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3717
3718 if (IsPIC && Subtarget.isSVR4ABI()) {
3719 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3720 GSDN->getOffset(),
3722 return getTOCEntry(DAG, DL, GA);
3723 }
3724
3725 SDValue GAHi =
3726 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3727 SDValue GALo =
3728 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3729
3730 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3731}
3732
3733SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3734 bool IsStrict = Op->isStrictFPOpcode();
3735 ISD::CondCode CC =
3736 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3737 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3738 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3739 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3740 EVT LHSVT = LHS.getValueType();
3741 SDLoc dl(Op);
3742
3743 // Soften the setcc with libcall if it is fp128.
3744 if (LHSVT == MVT::f128) {
3745 assert(!Subtarget.hasP9Vector() &&
3746 "SETCC for f128 is already legal under Power9!");
3747 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3748 Op->getOpcode() == ISD::STRICT_FSETCCS);
3749 if (RHS.getNode())
3750 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3751 DAG.getCondCode(CC));
3752 if (IsStrict)
3753 return DAG.getMergeValues({LHS, Chain}, dl);
3754 return LHS;
3755 }
3756
3757 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3758
3759 if (Op.getValueType() == MVT::v2i64) {
3760 // When the operands themselves are v2i64 values, we need to do something
3761 // special because VSX has no underlying comparison operations for these.
3762 if (LHS.getValueType() == MVT::v2i64) {
3763 // Equality can be handled by casting to the legal type for Altivec
3764 // comparisons, everything else needs to be expanded.
3765 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3766 return SDValue();
3767 SDValue SetCC32 = DAG.getSetCC(
3768 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3769 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3770 int ShuffV[] = {1, 0, 3, 2};
3771 SDValue Shuff =
3772 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3773 return DAG.getBitcast(MVT::v2i64,
3774 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3775 dl, MVT::v4i32, Shuff, SetCC32));
3776 }
3777
3778 // We handle most of these in the usual way.
3779 return Op;
3780 }
3781
3782 // If we're comparing for equality to zero, expose the fact that this is
3783 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3784 // fold the new nodes.
3785 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3786 return V;
3787
3788 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3789 // Leave comparisons against 0 and -1 alone for now, since they're usually
3790 // optimized. FIXME: revisit this when we can custom lower all setcc
3791 // optimizations.
3792 if (C->isAllOnes() || C->isZero())
3793 return SDValue();
3794 }
3795
3796 // If we have an integer seteq/setne, turn it into a compare against zero
3797 // by xor'ing the rhs with the lhs, which is faster than setting a
3798 // condition register, reading it back out, and masking the correct bit. The
3799 // normal approach here uses sub to do this instead of xor. Using xor exposes
3800 // the result to other bit-twiddling opportunities.
3801 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3802 EVT VT = Op.getValueType();
3803 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3804 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3805 }
3806 return SDValue();
3807}
3808
3809SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3810 SDNode *Node = Op.getNode();
3811 EVT VT = Node->getValueType(0);
3812 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3813 SDValue InChain = Node->getOperand(0);
3814 SDValue VAListPtr = Node->getOperand(1);
3815 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3816 SDLoc dl(Node);
3817
3818 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3819
3820 // gpr_index
3821 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3822 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3823 InChain = GprIndex.getValue(1);
3824
3825 if (VT == MVT::i64) {
3826 // Check if GprIndex is even
3827 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3828 DAG.getConstant(1, dl, MVT::i32));
3829 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3830 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3831 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3832 DAG.getConstant(1, dl, MVT::i32));
3833 // Align GprIndex to be even if it isn't
3834 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3835 GprIndex);
3836 }
3837
3838 // fpr index is 1 byte after gpr
3839 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3840 DAG.getConstant(1, dl, MVT::i32));
3841
3842 // fpr
3843 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3844 FprPtr, MachinePointerInfo(SV), MVT::i8);
3845 InChain = FprIndex.getValue(1);
3846
3847 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3848 DAG.getConstant(8, dl, MVT::i32));
3849
3850 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3851 DAG.getConstant(4, dl, MVT::i32));
3852
3853 // areas
3854 SDValue OverflowArea =
3855 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3856 InChain = OverflowArea.getValue(1);
3857
3858 SDValue RegSaveArea =
3859 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3860 InChain = RegSaveArea.getValue(1);
3861
3862 // select overflow_area if index > 8
3863 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3864 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3865
3866 // adjustment constant gpr_index * 4/8
3867 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3868 VT.isInteger() ? GprIndex : FprIndex,
3869 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3870 MVT::i32));
3871
3872 // OurReg = RegSaveArea + RegConstant
3873 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3874 RegConstant);
3875
3876 // Floating types are 32 bytes into RegSaveArea
3877 if (VT.isFloatingPoint())
3878 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3879 DAG.getConstant(32, dl, MVT::i32));
3880
3881 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3882 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3883 VT.isInteger() ? GprIndex : FprIndex,
3884 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3885 MVT::i32));
3886
3887 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3888 VT.isInteger() ? VAListPtr : FprPtr,
3889 MachinePointerInfo(SV), MVT::i8);
3890
3891 // determine if we should load from reg_save_area or overflow_area
3892 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3893
3894 // increase overflow_area by 4/8 if gpr/fpr > 8
3895 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3896 DAG.getConstant(VT.isInteger() ? 4 : 8,
3897 dl, MVT::i32));
3898
3899 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3900 OverflowAreaPlusN);
3901
3902 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3903 MachinePointerInfo(), MVT::i32);
3904
3905 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3906}
3907
3908SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3909 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3910
3911 // We have to copy the entire va_list struct:
3912 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3913 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3914 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3915 false, true, /*CI=*/nullptr, std::nullopt,
3917}
3918
3919SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3920 SelectionDAG &DAG) const {
3921 return Op.getOperand(0);
3922}
3923
3924SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3927
3928 assert((Op.getOpcode() == ISD::INLINEASM ||
3929 Op.getOpcode() == ISD::INLINEASM_BR) &&
3930 "Expecting Inline ASM node.");
3931
3932 // If an LR store is already known to be required then there is not point in
3933 // checking this ASM as well.
3934 if (MFI.isLRStoreRequired())
3935 return Op;
3936
3937 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3938 // type MVT::Glue. We want to ignore this last operand if that is the case.
3939 unsigned NumOps = Op.getNumOperands();
3940 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3941 --NumOps;
3942
3943 // Check all operands that may contain the LR.
3944 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3945 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3946 unsigned NumVals = Flags.getNumOperandRegisters();
3947 ++i; // Skip the ID value.
3948
3949 switch (Flags.getKind()) {
3950 default:
3951 llvm_unreachable("Bad flags!");
3955 i += NumVals;
3956 break;
3960 for (; NumVals; --NumVals, ++i) {
3961 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3962 if (Reg != PPC::LR && Reg != PPC::LR8)
3963 continue;
3964 MFI.setLRStoreRequired();
3965 return Op;
3966 }
3967 break;
3968 }
3969 }
3970 }
3971
3972 return Op;
3973}
3974
3975SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3976 SelectionDAG &DAG) const {
3977 SDValue Chain = Op.getOperand(0);
3978 SDValue Trmp = Op.getOperand(1); // trampoline
3979 SDValue FPtr = Op.getOperand(2); // nested function
3980 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3981 SDLoc dl(Op);
3982
3983 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3984
3985 if (Subtarget.isAIXABI()) {
3986 // On AIX we create a trampoline descriptor by combining the
3987 // entry point and TOC from the global descriptor (FPtr) with the
3988 // nest argument as the environment pointer.
3989 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3990 MaybeAlign PointerAlign(PointerSize);
3991 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3995
3996 uint64_t TOCPointerOffset = 1 * PointerSize;
3997 uint64_t EnvPointerOffset = 2 * PointerSize;
3998 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3999 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
4000
4001 const Value *TrampolineAddr =
4002 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4003 const Function *Func =
4004 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
4005
4006 SDValue OutChains[3];
4007
4008 // Copy the entry point address from the global descriptor to the
4009 // trampoline buffer.
4010 SDValue LoadEntryPoint =
4011 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
4012 PointerAlign, MMOFlags);
4013 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
4014 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
4015 MachinePointerInfo(TrampolineAddr, 0));
4016
4017 // Copy the TOC pointer from the global descriptor to the trampoline
4018 // buffer.
4019 SDValue TOCFromDescriptorPtr =
4020 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
4021 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
4022 MachinePointerInfo(Func, TOCPointerOffset),
4023 PointerAlign, MMOFlags);
4024 SDValue TrampolineTOCPointer =
4025 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
4026 SDValue TOCLoadChain = TOCReg.getValue(1);
4027 OutChains[1] =
4028 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
4029 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
4030
4031 // Store the nest argument into the environment pointer in the trampoline
4032 // buffer.
4033 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
4034 OutChains[2] =
4035 DAG.getStore(Chain, dl, Nest, EnvPointer,
4036 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
4037
4039 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
4040 return TokenFactor;
4041 }
4042
4043 bool isPPC64 = (PtrVT == MVT::i64);
4044 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4045
4047 Args.emplace_back(Trmp, IntPtrTy);
4048 // TrampSize == (isPPC64 ? 48 : 40);
4049 Args.emplace_back(
4050 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
4051 IntPtrTy);
4052 Args.emplace_back(FPtr, IntPtrTy);
4053 Args.emplace_back(Nest, IntPtrTy);
4054
4055 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4057 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4059 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4060
4061 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4062 return CallResult.second;
4063}
4064
4065SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4067 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4068 EVT PtrVT = getPointerTy(MF.getDataLayout());
4069
4070 SDLoc dl(Op);
4071
4072 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4073 // vastart just stores the address of the VarArgsFrameIndex slot into the
4074 // memory location argument.
4075 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4076 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4077 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4078 MachinePointerInfo(SV));
4079 }
4080
4081 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4082 // We suppose the given va_list is already allocated.
4083 //
4084 // typedef struct {
4085 // char gpr; /* index into the array of 8 GPRs
4086 // * stored in the register save area
4087 // * gpr=0 corresponds to r3,
4088 // * gpr=1 to r4, etc.
4089 // */
4090 // char fpr; /* index into the array of 8 FPRs
4091 // * stored in the register save area
4092 // * fpr=0 corresponds to f1,
4093 // * fpr=1 to f2, etc.
4094 // */
4095 // char *overflow_arg_area;
4096 // /* location on stack that holds
4097 // * the next overflow argument
4098 // */
4099 // char *reg_save_area;
4100 // /* where r3:r10 and f1:f8 (if saved)
4101 // * are stored
4102 // */
4103 // } va_list[1];
4104
4105 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4106 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4107 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4108 PtrVT);
4109 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4110 PtrVT);
4111
4112 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4113 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4114
4115 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4116 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4117
4118 uint64_t FPROffset = 1;
4119 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4120
4121 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4122
4123 // Store first byte : number of int regs
4124 SDValue firstStore =
4125 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4126 MachinePointerInfo(SV), MVT::i8);
4127 uint64_t nextOffset = FPROffset;
4128 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4129 ConstFPROffset);
4130
4131 // Store second byte : number of float regs
4132 SDValue secondStore =
4133 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4134 MachinePointerInfo(SV, nextOffset), MVT::i8);
4135 nextOffset += StackOffset;
4136 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4137
4138 // Store second word : arguments given on stack
4139 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4140 MachinePointerInfo(SV, nextOffset));
4141 nextOffset += FrameOffset;
4142 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4143
4144 // Store third word : arguments given in registers
4145 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4146 MachinePointerInfo(SV, nextOffset));
4147}
4148
4149/// FPR - The set of FP registers that should be allocated for arguments
4150/// on Darwin and AIX.
4151static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4152 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4153 PPC::F11, PPC::F12, PPC::F13};
4154
4155/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4156/// the stack.
4157static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4158 unsigned PtrByteSize) {
4159 unsigned ArgSize = ArgVT.getStoreSize();
4160 if (Flags.isByVal())
4161 ArgSize = Flags.getByValSize();
4162
4163 // Round up to multiples of the pointer size, except for array members,
4164 // which are always packed.
4165 if (!Flags.isInConsecutiveRegs())
4166 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4167
4168 return ArgSize;
4169}
4170
4171/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4172/// on the stack.
4174 ISD::ArgFlagsTy Flags,
4175 unsigned PtrByteSize) {
4176 Align Alignment(PtrByteSize);
4177
4178 // Altivec parameters are padded to a 16 byte boundary.
4179 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4180 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4181 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4182 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4183 Alignment = Align(16);
4184
4185 // ByVal parameters are aligned as requested.
4186 if (Flags.isByVal()) {
4187 auto BVAlign = Flags.getNonZeroByValAlign();
4188 if (BVAlign > PtrByteSize) {
4189 if (BVAlign.value() % PtrByteSize != 0)
4191 "ByVal alignment is not a multiple of the pointer size");
4192
4193 Alignment = BVAlign;
4194 }
4195 }
4196
4197 // Array members are always packed to their original alignment.
4198 if (Flags.isInConsecutiveRegs()) {
4199 // If the array member was split into multiple registers, the first
4200 // needs to be aligned to the size of the full type. (Except for
4201 // ppcf128, which is only aligned as its f64 components.)
4202 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4203 Alignment = Align(OrigVT.getStoreSize());
4204 else
4205 Alignment = Align(ArgVT.getStoreSize());
4206 }
4207
4208 return Alignment;
4209}
4210
4211/// CalculateStackSlotUsed - Return whether this argument will use its
4212/// stack slot (instead of being passed in registers). ArgOffset,
4213/// AvailableFPRs, and AvailableVRs must hold the current argument
4214/// position, and will be updated to account for this argument.
4215static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4216 unsigned PtrByteSize, unsigned LinkageSize,
4217 unsigned ParamAreaSize, unsigned &ArgOffset,
4218 unsigned &AvailableFPRs,
4219 unsigned &AvailableVRs) {
4220 bool UseMemory = false;
4221
4222 // Respect alignment of argument on the stack.
4223 Align Alignment =
4224 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4225 ArgOffset = alignTo(ArgOffset, Alignment);
4226 // If there's no space left in the argument save area, we must
4227 // use memory (this check also catches zero-sized arguments).
4228 if (ArgOffset >= LinkageSize + ParamAreaSize)
4229 UseMemory = true;
4230
4231 // Allocate argument on the stack.
4232 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4233 if (Flags.isInConsecutiveRegsLast())
4234 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4235 // If we overran the argument save area, we must use memory
4236 // (this check catches arguments passed partially in memory)
4237 if (ArgOffset > LinkageSize + ParamAreaSize)
4238 UseMemory = true;
4239
4240 // However, if the argument is actually passed in an FPR or a VR,
4241 // we don't use memory after all.
4242 if (!Flags.isByVal()) {
4243 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4244 if (AvailableFPRs > 0) {
4245 --AvailableFPRs;
4246 return false;
4247 }
4248 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4249 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4250 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4251 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4252 if (AvailableVRs > 0) {
4253 --AvailableVRs;
4254 return false;
4255 }
4256 }
4257
4258 return UseMemory;
4259}
4260
4261/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4262/// ensure minimum alignment required for target.
4264 unsigned NumBytes) {
4265 return alignTo(NumBytes, Lowering->getStackAlign());
4266}
4267
4268SDValue PPCTargetLowering::LowerFormalArguments(
4269 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4270 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4271 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4272 if (Subtarget.isAIXABI())
4273 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4274 InVals);
4275 if (Subtarget.is64BitELFABI())
4276 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4277 InVals);
4278 assert(Subtarget.is32BitELFABI());
4279 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4280 InVals);
4281}
4282
4283SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4284 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4285 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4286 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4287
4288 // 32-bit SVR4 ABI Stack Frame Layout:
4289 // +-----------------------------------+
4290 // +--> | Back chain |
4291 // | +-----------------------------------+
4292 // | | Floating-point register save area |
4293 // | +-----------------------------------+
4294 // | | General register save area |
4295 // | +-----------------------------------+
4296 // | | CR save word |
4297 // | +-----------------------------------+
4298 // | | VRSAVE save word |
4299 // | +-----------------------------------+
4300 // | | Alignment padding |
4301 // | +-----------------------------------+
4302 // | | Vector register save area |
4303 // | +-----------------------------------+
4304 // | | Local variable space |
4305 // | +-----------------------------------+
4306 // | | Parameter list area |
4307 // | +-----------------------------------+
4308 // | | LR save word |
4309 // | +-----------------------------------+
4310 // SP--> +--- | Back chain |
4311 // +-----------------------------------+
4312 //
4313 // Specifications:
4314 // System V Application Binary Interface PowerPC Processor Supplement
4315 // AltiVec Technology Programming Interface Manual
4316
4318 MachineFrameInfo &MFI = MF.getFrameInfo();
4319 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4320
4321 EVT PtrVT = getPointerTy(MF.getDataLayout());
4322 // Potential tail calls could cause overwriting of argument stack slots.
4323 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4324 (CallConv == CallingConv::Fast));
4325 const Align PtrAlign(4);
4326
4327 // Assign locations to all of the incoming arguments.
4329 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4330 *DAG.getContext());
4331
4332 // Reserve space for the linkage area on the stack.
4333 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4334 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4335 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4336
4337 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4338 CCValAssign &VA = ArgLocs[i];
4339
4340 // Arguments stored in registers.
4341 if (VA.isRegLoc()) {
4342 const TargetRegisterClass *RC;
4343 EVT ValVT = VA.getValVT();
4344
4345 switch (ValVT.getSimpleVT().SimpleTy) {
4346 default:
4347 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4348 case MVT::i1:
4349 case MVT::i32:
4350 RC = &PPC::GPRCRegClass;
4351 break;
4352 case MVT::f32:
4353 if (Subtarget.hasP8Vector())
4354 RC = &PPC::VSSRCRegClass;
4355 else if (Subtarget.hasSPE())
4356 RC = &PPC::GPRCRegClass;
4357 else
4358 RC = &PPC::F4RCRegClass;
4359 break;
4360 case MVT::f64:
4361 if (Subtarget.hasVSX())
4362 RC = &PPC::VSFRCRegClass;
4363 else if (Subtarget.hasSPE())
4364 // SPE passes doubles in GPR pairs.
4365 RC = &PPC::GPRCRegClass;
4366 else
4367 RC = &PPC::F8RCRegClass;
4368 break;
4369 case MVT::v16i8:
4370 case MVT::v8i16:
4371 case MVT::v4i32:
4372 RC = &PPC::VRRCRegClass;
4373 break;
4374 case MVT::v4f32:
4375 RC = &PPC::VRRCRegClass;
4376 break;
4377 case MVT::v2f64:
4378 case MVT::v2i64:
4379 RC = &PPC::VRRCRegClass;
4380 break;
4381 }
4382
4383 SDValue ArgValue;
4384 // Transform the arguments stored in physical registers into
4385 // virtual ones.
4386 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4387 assert(i + 1 < e && "No second half of double precision argument");
4388 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4389 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4390 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4391 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4392 if (!Subtarget.isLittleEndian())
4393 std::swap (ArgValueLo, ArgValueHi);
4394 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4395 ArgValueHi);
4396 } else {
4397 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4398 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4399 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4400 if (ValVT == MVT::i1)
4401 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4402 }
4403
4404 InVals.push_back(ArgValue);
4405 } else {
4406 // Argument stored in memory.
4407 assert(VA.isMemLoc());
4408
4409 // Get the extended size of the argument type in stack
4410 unsigned ArgSize = VA.getLocVT().getStoreSize();
4411 // Get the actual size of the argument type
4412 unsigned ObjSize = VA.getValVT().getStoreSize();
4413 unsigned ArgOffset = VA.getLocMemOffset();
4414 // Stack objects in PPC32 are right justified.
4415 ArgOffset += ArgSize - ObjSize;
4416 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4417
4418 // Create load nodes to retrieve arguments from the stack.
4419 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4420 InVals.push_back(
4421 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4422 }
4423 }
4424
4425 // Assign locations to all of the incoming aggregate by value arguments.
4426 // Aggregates passed by value are stored in the local variable space of the
4427 // caller's stack frame, right above the parameter list area.
4428 SmallVector<CCValAssign, 16> ByValArgLocs;
4429 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4430 ByValArgLocs, *DAG.getContext());
4431
4432 // Reserve stack space for the allocations in CCInfo.
4433 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4434
4435 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4436
4437 // Area that is at least reserved in the caller of this function.
4438 unsigned MinReservedArea = CCByValInfo.getStackSize();
4439 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4440
4441 // Set the size that is at least reserved in caller of this function. Tail
4442 // call optimized function's reserved stack space needs to be aligned so that
4443 // taking the difference between two stack areas will result in an aligned
4444 // stack.
4445 MinReservedArea =
4446 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4447 FuncInfo->setMinReservedArea(MinReservedArea);
4448
4450
4451 // If the function takes variable number of arguments, make a frame index for
4452 // the start of the first vararg value... for expansion of llvm.va_start.
4453 if (isVarArg) {
4454 static const MCPhysReg GPArgRegs[] = {
4455 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4456 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4457 };
4458 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4459
4460 static const MCPhysReg FPArgRegs[] = {
4461 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4462 PPC::F8
4463 };
4464 unsigned NumFPArgRegs = std::size(FPArgRegs);
4465
4466 if (useSoftFloat() || hasSPE())
4467 NumFPArgRegs = 0;
4468
4469 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4470 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4471
4472 // Make room for NumGPArgRegs and NumFPArgRegs.
4473 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4474 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4475
4477 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4478
4479 FuncInfo->setVarArgsFrameIndex(
4480 MFI.CreateStackObject(Depth, Align(8), false));
4481 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4482
4483 // The fixed integer arguments of a variadic function are stored to the
4484 // VarArgsFrameIndex on the stack so that they may be loaded by
4485 // dereferencing the result of va_next.
4486 for (MCPhysReg GPArgReg : GPArgRegs) {
4487 // Get an existing live-in vreg, or add a new one.
4488 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4489 if (!VReg)
4490 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4491
4492 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4493 SDValue Store =
4494 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4495 MemOps.push_back(Store);
4496 // Increment the address by four for the next argument to store
4497 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4498 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4499 }
4500
4501 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4502 // is set.
4503 // The double arguments are stored to the VarArgsFrameIndex
4504 // on the stack.
4505 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4506 // Get an existing live-in vreg, or add a new one.
4507 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4508 if (!VReg)
4509 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4510
4511 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4512 SDValue Store =
4513 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4514 MemOps.push_back(Store);
4515 // Increment the address by eight for the next argument to store
4516 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4517 PtrVT);
4518 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4519 }
4520 }
4521
4522 if (!MemOps.empty())
4523 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4524
4525 return Chain;
4526}
4527
4528// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4529// value to MVT::i64 and then truncate to the correct register size.
4530SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4531 EVT ObjectVT, SelectionDAG &DAG,
4532 SDValue ArgVal,
4533 const SDLoc &dl) const {
4534 if (Flags.isSExt())
4535 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4536 DAG.getValueType(ObjectVT));
4537 else if (Flags.isZExt())
4538 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4539 DAG.getValueType(ObjectVT));
4540
4541 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4542}
4543
4544SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4545 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4546 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4547 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4548 // TODO: add description of PPC stack frame format, or at least some docs.
4549 //
4550 bool isELFv2ABI = Subtarget.isELFv2ABI();
4551 bool isLittleEndian = Subtarget.isLittleEndian();
4553 MachineFrameInfo &MFI = MF.getFrameInfo();
4554 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4555
4556 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4557 "fastcc not supported on varargs functions");
4558
4559 EVT PtrVT = getPointerTy(MF.getDataLayout());
4560 // Potential tail calls could cause overwriting of argument stack slots.
4561 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4562 (CallConv == CallingConv::Fast));
4563 unsigned PtrByteSize = 8;
4564 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4565
4566 static const MCPhysReg GPR[] = {
4567 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4568 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4569 };
4570 static const MCPhysReg VR[] = {
4571 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4572 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4573 };
4574
4575 const unsigned Num_GPR_Regs = std::size(GPR);
4576 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4577 const unsigned Num_VR_Regs = std::size(VR);
4578
4579 // Do a first pass over the arguments to determine whether the ABI
4580 // guarantees that our caller has allocated the parameter save area
4581 // on its stack frame. In the ELFv1 ABI, this is always the case;
4582 // in the ELFv2 ABI, it is true if this is a vararg function or if
4583 // any parameter is located in a stack slot.
4584
4585 bool HasParameterArea = !isELFv2ABI || isVarArg;
4586 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4587 unsigned NumBytes = LinkageSize;
4588 unsigned AvailableFPRs = Num_FPR_Regs;
4589 unsigned AvailableVRs = Num_VR_Regs;
4590 for (const ISD::InputArg &In : Ins) {
4591 if (In.Flags.isNest())
4592 continue;
4593
4594 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4595 LinkageSize, ParamAreaSize, NumBytes,
4596 AvailableFPRs, AvailableVRs))
4597 HasParameterArea = true;
4598 }
4599
4600 // Add DAG nodes to load the arguments or copy them out of registers. On
4601 // entry to a function on PPC, the arguments start after the linkage area,
4602 // although the first ones are often in registers.
4603
4604 unsigned ArgOffset = LinkageSize;
4605 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4608 unsigned CurArgIdx = 0;
4609 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4610 SDValue ArgVal;
4611 bool needsLoad = false;
4612 EVT ObjectVT = Ins[ArgNo].VT;
4613 EVT OrigVT = Ins[ArgNo].ArgVT;
4614 unsigned ObjSize = ObjectVT.getStoreSize();
4615 unsigned ArgSize = ObjSize;
4616 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4617 if (Ins[ArgNo].isOrigArg()) {
4618 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4619 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4620 }
4621 // We re-align the argument offset for each argument, except when using the
4622 // fast calling convention, when we need to make sure we do that only when
4623 // we'll actually use a stack slot.
4624 unsigned CurArgOffset;
4625 Align Alignment;
4626 auto ComputeArgOffset = [&]() {
4627 /* Respect alignment of argument on the stack. */
4628 Alignment =
4629 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4630 ArgOffset = alignTo(ArgOffset, Alignment);
4631 CurArgOffset = ArgOffset;
4632 };
4633
4634 if (CallConv != CallingConv::Fast) {
4635 ComputeArgOffset();
4636
4637 /* Compute GPR index associated with argument offset. */
4638 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4639 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4640 }
4641
4642 // FIXME the codegen can be much improved in some cases.
4643 // We do not have to keep everything in memory.
4644 if (Flags.isByVal()) {
4645 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4646
4647 if (CallConv == CallingConv::Fast)
4648 ComputeArgOffset();
4649
4650 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4651 ObjSize = Flags.getByValSize();
4652 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4653 // Empty aggregate parameters do not take up registers. Examples:
4654 // struct { } a;
4655 // union { } b;
4656 // int c[0];
4657 // etc. However, we have to provide a place-holder in InVals, so
4658 // pretend we have an 8-byte item at the current address for that
4659 // purpose.
4660 if (!ObjSize) {
4661 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4662 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4663 InVals.push_back(FIN);
4664 continue;
4665 }
4666
4667 // Create a stack object covering all stack doublewords occupied
4668 // by the argument. If the argument is (fully or partially) on
4669 // the stack, or if the argument is fully in registers but the
4670 // caller has allocated the parameter save anyway, we can refer
4671 // directly to the caller's stack frame. Otherwise, create a
4672 // local copy in our own frame.
4673 int FI;
4674 if (HasParameterArea ||
4675 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4676 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4677 else
4678 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4679 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4680
4681 // Handle aggregates smaller than 8 bytes.
4682 if (ObjSize < PtrByteSize) {
4683 // The value of the object is its address, which differs from the
4684 // address of the enclosing doubleword on big-endian systems.
4685 SDValue Arg = FIN;
4686 if (!isLittleEndian) {
4687 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4688 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4689 }
4690 InVals.push_back(Arg);
4691
4692 if (GPR_idx != Num_GPR_Regs) {
4693 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4694 FuncInfo->addLiveInAttr(VReg, Flags);
4695 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4696 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4697 SDValue Store =
4698 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4699 MachinePointerInfo(&*FuncArg), ObjType);
4700 MemOps.push_back(Store);
4701 }
4702 // Whether we copied from a register or not, advance the offset
4703 // into the parameter save area by a full doubleword.
4704 ArgOffset += PtrByteSize;
4705 continue;
4706 }
4707
4708 // The value of the object is its address, which is the address of
4709 // its first stack doubleword.
4710 InVals.push_back(FIN);
4711
4712 // Store whatever pieces of the object are in registers to memory.
4713 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4714 if (GPR_idx == Num_GPR_Regs)
4715 break;
4716
4717 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4718 FuncInfo->addLiveInAttr(VReg, Flags);
4719 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4720 SDValue Addr = FIN;
4721 if (j) {
4722 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4723 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4724 }
4725 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4726 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4727 SDValue Store =
4728 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4729 MachinePointerInfo(&*FuncArg, j), ObjType);
4730 MemOps.push_back(Store);
4731 ++GPR_idx;
4732 }
4733 ArgOffset += ArgSize;
4734 continue;
4735 }
4736
4737 switch (ObjectVT.getSimpleVT().SimpleTy) {
4738 default: llvm_unreachable("Unhandled argument type!");
4739 case MVT::i1:
4740 case MVT::i32:
4741 case MVT::i64:
4742 if (Flags.isNest()) {
4743 // The 'nest' parameter, if any, is passed in R11.
4744 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4745 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4746
4747 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4748 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4749
4750 break;
4751 }
4752
4753 // These can be scalar arguments or elements of an integer array type
4754 // passed directly. Clang may use those instead of "byval" aggregate
4755 // types to avoid forcing arguments to memory unnecessarily.
4756 if (GPR_idx != Num_GPR_Regs) {
4757 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4758 FuncInfo->addLiveInAttr(VReg, Flags);
4759 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4760
4761 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4762 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4763 // value to MVT::i64 and then truncate to the correct register size.
4764 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4765 } else {
4766 if (CallConv == CallingConv::Fast)
4767 ComputeArgOffset();
4768
4769 needsLoad = true;
4770 ArgSize = PtrByteSize;
4771 }
4772 if (CallConv != CallingConv::Fast || needsLoad)
4773 ArgOffset += 8;
4774 break;
4775
4776 case MVT::f32:
4777 case MVT::f64:
4778 // These can be scalar arguments or elements of a float array type
4779 // passed directly. The latter are used to implement ELFv2 homogenous
4780 // float aggregates.
4781 if (FPR_idx != Num_FPR_Regs) {
4782 unsigned VReg;
4783
4784 if (ObjectVT == MVT::f32)
4785 VReg = MF.addLiveIn(FPR[FPR_idx],
4786 Subtarget.hasP8Vector()
4787 ? &PPC::VSSRCRegClass
4788 : &PPC::F4RCRegClass);
4789 else
4790 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4791 ? &PPC::VSFRCRegClass
4792 : &PPC::F8RCRegClass);
4793
4794 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4795 ++FPR_idx;
4796 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4797 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4798 // once we support fp <-> gpr moves.
4799
4800 // This can only ever happen in the presence of f32 array types,
4801 // since otherwise we never run out of FPRs before running out
4802 // of GPRs.
4803 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4804 FuncInfo->addLiveInAttr(VReg, Flags);
4805 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4806
4807 if (ObjectVT == MVT::f32) {
4808 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4809 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4810 DAG.getConstant(32, dl, MVT::i32));
4811 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4812 }
4813
4814 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4815 } else {
4816 if (CallConv == CallingConv::Fast)
4817 ComputeArgOffset();
4818
4819 needsLoad = true;
4820 }
4821
4822 // When passing an array of floats, the array occupies consecutive
4823 // space in the argument area; only round up to the next doubleword
4824 // at the end of the array. Otherwise, each float takes 8 bytes.
4825 if (CallConv != CallingConv::Fast || needsLoad) {
4826 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4827 ArgOffset += ArgSize;
4828 if (Flags.isInConsecutiveRegsLast())
4829 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4830 }
4831 break;
4832 case MVT::v4f32:
4833 case MVT::v4i32:
4834 case MVT::v8i16:
4835 case MVT::v16i8:
4836 case MVT::v2f64:
4837 case MVT::v2i64:
4838 case MVT::v1i128:
4839 case MVT::f128:
4840 // These can be scalar arguments or elements of a vector array type
4841 // passed directly. The latter are used to implement ELFv2 homogenous
4842 // vector aggregates.
4843 if (VR_idx != Num_VR_Regs) {
4844 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4845 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4846 ++VR_idx;
4847 } else {
4848 if (CallConv == CallingConv::Fast)
4849 ComputeArgOffset();
4850 needsLoad = true;
4851 }
4852 if (CallConv != CallingConv::Fast || needsLoad)
4853 ArgOffset += 16;
4854 break;
4855 }
4856
4857 // We need to load the argument to a virtual register if we determined
4858 // above that we ran out of physical registers of the appropriate type.
4859 if (needsLoad) {
4860 if (ObjSize < ArgSize && !isLittleEndian)
4861 CurArgOffset += ArgSize - ObjSize;
4862 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4863 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4864 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4865 }
4866
4867 InVals.push_back(ArgVal);
4868 }
4869
4870 // Area that is at least reserved in the caller of this function.
4871 unsigned MinReservedArea;
4872 if (HasParameterArea)
4873 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4874 else
4875 MinReservedArea = LinkageSize;
4876
4877 // Set the size that is at least reserved in caller of this function. Tail
4878 // call optimized functions' reserved stack space needs to be aligned so that
4879 // taking the difference between two stack areas will result in an aligned
4880 // stack.
4881 MinReservedArea =
4882 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4883 FuncInfo->setMinReservedArea(MinReservedArea);
4884
4885 // If the function takes variable number of arguments, make a frame index for
4886 // the start of the first vararg value... for expansion of llvm.va_start.
4887 // On ELFv2ABI spec, it writes:
4888 // C programs that are intended to be *portable* across different compilers
4889 // and architectures must use the header file <stdarg.h> to deal with variable
4890 // argument lists.
4891 if (isVarArg && MFI.hasVAStart()) {
4892 int Depth = ArgOffset;
4893
4894 FuncInfo->setVarArgsFrameIndex(
4895 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4896 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4897
4898 // If this function is vararg, store any remaining integer argument regs
4899 // to their spots on the stack so that they may be loaded by dereferencing
4900 // the result of va_next.
4901 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4902 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4903 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4904 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4905 SDValue Store =
4906 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4907 MemOps.push_back(Store);
4908 // Increment the address by four for the next argument to store
4909 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4910 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4911 }
4912 }
4913
4914 if (!MemOps.empty())
4915 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4916
4917 return Chain;
4918}
4919
4920/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4921/// adjusted to accommodate the arguments for the tailcall.
4922static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4923 unsigned ParamSize) {
4924
4925 if (!isTailCall) return 0;
4926
4928 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4929 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4930 // Remember only if the new adjustment is bigger.
4931 if (SPDiff < FI->getTailCallSPDelta())
4932 FI->setTailCallSPDelta(SPDiff);
4933
4934 return SPDiff;
4935}
4936
4937static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4938
4939static bool callsShareTOCBase(const Function *Caller,
4940 const GlobalValue *CalleeGV,
4941 const TargetMachine &TM) {
4942 // It does not make sense to call callsShareTOCBase() with a caller that
4943 // is PC Relative since PC Relative callers do not have a TOC.
4944#ifndef NDEBUG
4945 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4946 assert(!STICaller->isUsingPCRelativeCalls() &&
4947 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4948#endif
4949
4950 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4951 // don't have enough information to determine if the caller and callee share
4952 // the same TOC base, so we have to pessimistically assume they don't for
4953 // correctness.
4954 if (!CalleeGV)
4955 return false;
4956
4957 // If the callee is preemptable, then the static linker will use a plt-stub
4958 // which saves the toc to the stack, and needs a nop after the call
4959 // instruction to convert to a toc-restore.
4960 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4961 return false;
4962
4963 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4964 // We may need a TOC restore in the situation where the caller requires a
4965 // valid TOC but the callee is PC Relative and does not.
4966 const Function *F = dyn_cast<Function>(CalleeGV);
4967 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4968
4969 // If we have an Alias we can try to get the function from there.
4970 if (Alias) {
4971 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4972 F = dyn_cast<Function>(GlobalObj);
4973 }
4974
4975 // If we still have no valid function pointer we do not have enough
4976 // information to determine if the callee uses PC Relative calls so we must
4977 // assume that it does.
4978 if (!F)
4979 return false;
4980
4981 // If the callee uses PC Relative we cannot guarantee that the callee won't
4982 // clobber the TOC of the caller and so we must assume that the two
4983 // functions do not share a TOC base.
4984 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4985 if (STICallee->isUsingPCRelativeCalls())
4986 return false;
4987
4988 // If the GV is not a strong definition then we need to assume it can be
4989 // replaced by another function at link time. The function that replaces
4990 // it may not share the same TOC as the caller since the callee may be
4991 // replaced by a PC Relative version of the same function.
4992 if (!CalleeGV->isStrongDefinitionForLinker())
4993 return false;
4994
4995 // The medium and large code models are expected to provide a sufficiently
4996 // large TOC to provide all data addressing needs of a module with a
4997 // single TOC.
4998 if (CodeModel::Medium == TM.getCodeModel() ||
4999 CodeModel::Large == TM.getCodeModel())
5000 return true;
5001
5002 // Any explicitly-specified sections and section prefixes must also match.
5003 // Also, if we're using -ffunction-sections, then each function is always in
5004 // a different section (the same is true for COMDAT functions).
5005 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
5006 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
5007 return false;
5008 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
5009 if (F->getSectionPrefix() != Caller->getSectionPrefix())
5010 return false;
5011 }
5012
5013 return true;
5014}
5015
5016static bool
5018 const SmallVectorImpl<ISD::OutputArg> &Outs) {
5019 assert(Subtarget.is64BitELFABI());
5020
5021 const unsigned PtrByteSize = 8;
5022 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5023
5024 static const MCPhysReg GPR[] = {
5025 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5026 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5027 };
5028 static const MCPhysReg VR[] = {
5029 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5030 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5031 };
5032
5033 const unsigned NumGPRs = std::size(GPR);
5034 const unsigned NumFPRs = 13;
5035 const unsigned NumVRs = std::size(VR);
5036 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5037
5038 unsigned NumBytes = LinkageSize;
5039 unsigned AvailableFPRs = NumFPRs;
5040 unsigned AvailableVRs = NumVRs;
5041
5042 for (const ISD::OutputArg& Param : Outs) {
5043 if (Param.Flags.isNest()) continue;
5044
5045 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5046 LinkageSize, ParamAreaSize, NumBytes,
5047 AvailableFPRs, AvailableVRs))
5048 return true;
5049 }
5050 return false;
5051}
5052
5053static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5054 if (CB.arg_size() != CallerFn->arg_size())
5055 return false;
5056
5057 auto CalleeArgIter = CB.arg_begin();
5058 auto CalleeArgEnd = CB.arg_end();
5059 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5060
5061 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5062 const Value* CalleeArg = *CalleeArgIter;
5063 const Value* CallerArg = &(*CallerArgIter);
5064 if (CalleeArg == CallerArg)
5065 continue;
5066
5067 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5068 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5069 // }
5070 // 1st argument of callee is undef and has the same type as caller.
5071 if (CalleeArg->getType() == CallerArg->getType() &&
5072 isa<UndefValue>(CalleeArg))
5073 continue;
5074
5075 return false;
5076 }
5077
5078 return true;
5079}
5080
5081// Returns true if TCO is possible between the callers and callees
5082// calling conventions.
5083static bool
5085 CallingConv::ID CalleeCC) {
5086 // Tail calls are possible with fastcc and ccc.
5087 auto isTailCallableCC = [] (CallingConv::ID CC){
5088 return CC == CallingConv::C || CC == CallingConv::Fast;
5089 };
5090 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5091 return false;
5092
5093 // We can safely tail call both fastcc and ccc callees from a c calling
5094 // convention caller. If the caller is fastcc, we may have less stack space
5095 // than a non-fastcc caller with the same signature so disable tail-calls in
5096 // that case.
5097 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5098}
5099
5100bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5101 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5102 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5104 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5105 bool isCalleeExternalSymbol) const {
5106 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5107
5108 if (DisableSCO && !TailCallOpt) return false;
5109
5110 // Variadic argument functions are not supported.
5111 if (isVarArg) return false;
5112
5113 // Check that the calling conventions are compatible for tco.
5114 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5115 return false;
5116
5117 // Caller contains any byval parameter is not supported.
5118 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5119 return false;
5120
5121 // Callee contains any byval parameter is not supported, too.
5122 // Note: This is a quick work around, because in some cases, e.g.
5123 // caller's stack size > callee's stack size, we are still able to apply
5124 // sibling call optimization. For example, gcc is able to do SCO for caller1
5125 // in the following example, but not for caller2.
5126 // struct test {
5127 // long int a;
5128 // char ary[56];
5129 // } gTest;
5130 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5131 // b->a = v.a;
5132 // return 0;
5133 // }
5134 // void caller1(struct test a, struct test c, struct test *b) {
5135 // callee(gTest, b); }
5136 // void caller2(struct test *b) { callee(gTest, b); }
5137 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5138 return false;
5139
5140 // If callee and caller use different calling conventions, we cannot pass
5141 // parameters on stack since offsets for the parameter area may be different.
5142 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5143 return false;
5144
5145 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5146 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5147 // callee potentially have different TOC bases then we cannot tail call since
5148 // we need to restore the TOC pointer after the call.
5149 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5150 // We cannot guarantee this for indirect calls or calls to external functions.
5151 // When PC-Relative addressing is used, the concept of the TOC is no longer
5152 // applicable so this check is not required.
5153 // Check first for indirect calls.
5154 if (!Subtarget.isUsingPCRelativeCalls() &&
5155 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5156 return false;
5157
5158 // Check if we share the TOC base.
5159 if (!Subtarget.isUsingPCRelativeCalls() &&
5160 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5161 return false;
5162
5163 // TCO allows altering callee ABI, so we don't have to check further.
5164 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5165 return true;
5166
5167 if (DisableSCO) return false;
5168
5169 // If callee use the same argument list that caller is using, then we can
5170 // apply SCO on this case. If it is not, then we need to check if callee needs
5171 // stack for passing arguments.
5172 // PC Relative tail calls may not have a CallBase.
5173 // If there is no CallBase we cannot verify if we have the same argument
5174 // list so assume that we don't have the same argument list.
5175 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5176 needStackSlotPassParameters(Subtarget, Outs))
5177 return false;
5178 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5179 return false;
5180
5181 return true;
5182}
5183
5184/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5185/// for tail call optimization. Targets which want to do tail call
5186/// optimization should implement this function.
5187bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5188 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5189 CallingConv::ID CallerCC, bool isVarArg,
5190 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5191 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5192 return false;
5193
5194 // Variable argument functions are not supported.
5195 if (isVarArg)
5196 return false;
5197
5198 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5199 // Functions containing by val parameters are not supported.
5200 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5201 return false;
5202
5203 // Non-PIC/GOT tail calls are supported.
5204 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5205 return true;
5206
5207 // At the moment we can only do local tail calls (in same module, hidden
5208 // or protected) if we are generating PIC.
5209 if (CalleeGV)
5210 return CalleeGV->hasHiddenVisibility() ||
5211 CalleeGV->hasProtectedVisibility();
5212 }
5213
5214 return false;
5215}
5216
5217/// isCallCompatibleAddress - Return the immediate to use if the specified
5218/// 32-bit value is representable in the immediate field of a BxA instruction.
5220 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5221 if (!C) return nullptr;
5222
5223 int Addr = C->getZExtValue();
5224 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5225 SignExtend32<26>(Addr) != Addr)
5226 return nullptr; // Top 6 bits have to be sext of immediate.
5227
5228 return DAG
5230 (int)C->getZExtValue() >> 2, SDLoc(Op),
5232 .getNode();
5233}
5234
5235namespace {
5236
5237struct TailCallArgumentInfo {
5238 SDValue Arg;
5239 SDValue FrameIdxOp;
5240 int FrameIdx = 0;
5241
5242 TailCallArgumentInfo() = default;
5243};
5244
5245} // end anonymous namespace
5246
5247/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5249 SelectionDAG &DAG, SDValue Chain,
5250 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5251 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5252 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5253 SDValue Arg = TailCallArgs[i].Arg;
5254 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5255 int FI = TailCallArgs[i].FrameIdx;
5256 // Store relative to framepointer.
5257 MemOpChains.push_back(DAG.getStore(
5258 Chain, dl, Arg, FIN,
5260 }
5261}
5262
5263/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5264/// the appropriate stack slot for the tail call optimized function call.
5266 SDValue OldRetAddr, SDValue OldFP,
5267 int SPDiff, const SDLoc &dl) {
5268 if (SPDiff) {
5269 // Calculate the new stack slot for the return address.
5271 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5272 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5273 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5274 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5275 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5276 NewRetAddrLoc, true);
5277 SDValue NewRetAddrFrIdx =
5278 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5279 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5280 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5281 }
5282 return Chain;
5283}
5284
5285/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5286/// the position of the argument.
5288 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5289 int SPDiff, unsigned ArgOffset,
5290 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5291 int Offset = ArgOffset + SPDiff;
5292 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5293 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5294 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5295 SDValue FIN = DAG.getFrameIndex(FI, VT);
5296 TailCallArgumentInfo Info;
5297 Info.Arg = Arg;
5298 Info.FrameIdxOp = FIN;
5299 Info.FrameIdx = FI;
5300 TailCallArguments.push_back(Info);
5301}
5302
5303/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5304/// stack slot. Returns the chain as result and the loaded frame pointers in
5305/// LROpOut/FPOpout. Used when tail calling.
5306SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5307 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5308 SDValue &FPOpOut, const SDLoc &dl) const {
5309 if (SPDiff) {
5310 // Load the LR and FP stack slot for later adjusting.
5311 LROpOut = getReturnAddrFrameIndex(DAG);
5312 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5314 Chain = SDValue(LROpOut.getNode(), 1);
5315 }
5316 return Chain;
5317}
5318
5319/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5320/// by "Src" to address "Dst" of size "Size". Alignment information is
5321/// specified by the specific parameter attribute. The copy will be passed as
5322/// a byval function parameter.
5323/// Sometimes what we are copying is the end of a larger object, the part that
5324/// does not fit in registers.
5326 SDValue Chain, ISD::ArgFlagsTy Flags,
5327 SelectionDAG &DAG, const SDLoc &dl) {
5328 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5329 return DAG.getMemcpy(
5330 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5331 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5332}
5333
5334/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5335/// tail calls.
5337 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5338 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5339 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5340 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5342 if (!isTailCall) {
5343 if (isVector) {
5344 SDValue StackPtr;
5345 if (isPPC64)
5346 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5347 else
5348 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5349 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5350 DAG.getConstant(ArgOffset, dl, PtrVT));
5351 }
5352 MemOpChains.push_back(
5353 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5354 // Calculate and remember argument location.
5355 } else
5356 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5357 TailCallArguments);
5358}
5359
5360static void
5362 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5363 SDValue FPOp,
5364 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5365 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5366 // might overwrite each other in case of tail call optimization.
5367 SmallVector<SDValue, 8> MemOpChains2;
5368 // Do not flag preceding copytoreg stuff together with the following stuff.
5369 InGlue = SDValue();
5370 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5371 MemOpChains2, dl);
5372 if (!MemOpChains2.empty())
5373 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5374
5375 // Store the return address to the appropriate stack slot.
5376 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5377
5378 // Emit callseq_end just before tailcall node.
5379 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5380 InGlue = Chain.getValue(1);
5381}
5382
5383// Is this global address that of a function that can be called by name? (as
5384// opposed to something that must hold a descriptor for an indirect call).
5385static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5386 if (GV) {
5387 if (GV->isThreadLocal())
5388 return false;
5389
5390 return GV->getValueType()->isFunctionTy();
5391 }
5392
5393 return false;
5394}
5395
5396SDValue PPCTargetLowering::LowerCallResult(
5397 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5398 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5399 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5401 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5402 *DAG.getContext());
5403
5404 CCRetInfo.AnalyzeCallResult(
5405 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5407 : RetCC_PPC);
5408
5409 // Copy all of the result registers out of their specified physreg.
5410 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5411 CCValAssign &VA = RVLocs[i];
5412 assert(VA.isRegLoc() && "Can only return in registers!");
5413
5414 SDValue Val;
5415
5416 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5417 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5418 InGlue);
5419 Chain = Lo.getValue(1);
5420 InGlue = Lo.getValue(2);
5421 VA = RVLocs[++i]; // skip ahead to next loc
5422 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5423 InGlue);
5424 Chain = Hi.getValue(1);
5425 InGlue = Hi.getValue(2);
5426 if (!Subtarget.isLittleEndian())
5427 std::swap (Lo, Hi);
5428 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5429 } else {
5430 Val = DAG.getCopyFromReg(Chain, dl,
5431 VA.getLocReg(), VA.getLocVT(), InGlue);
5432 Chain = Val.getValue(1);
5433 InGlue = Val.getValue(2);
5434 }
5435
5436 switch (VA.getLocInfo()) {
5437 default: llvm_unreachable("Unknown loc info!");
5438 case CCValAssign::Full: break;
5439 case CCValAssign::AExt:
5440 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5441 break;
5442 case CCValAssign::ZExt:
5443 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5444 DAG.getValueType(VA.getValVT()));
5445 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5446 break;
5447 case CCValAssign::SExt:
5448 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5449 DAG.getValueType(VA.getValVT()));
5450 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5451 break;
5452 }
5453
5454 InVals.push_back(Val);
5455 }
5456
5457 return Chain;
5458}
5459
5460static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5461 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5462 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5463 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5464
5465 // PatchPoint calls are not indirect.
5466 if (isPatchPoint)
5467 return false;
5468
5469 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5470 return false;
5471
5472 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5473 // becuase the immediate function pointer points to a descriptor instead of
5474 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5475 // pointer immediate points to the global entry point, while the BLA would
5476 // need to jump to the local entry point (see rL211174).
5477 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5478 isBLACompatibleAddress(Callee, DAG))
5479 return false;
5480
5481 return true;
5482}
5483
5484// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5485static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5486 return Subtarget.isAIXABI() ||
5487 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5488}
5489
5491 const Function &Caller, const SDValue &Callee,
5492 const PPCSubtarget &Subtarget,
5493 const TargetMachine &TM,
5494 bool IsStrictFPCall = false) {
5495 if (CFlags.IsTailCall)
5496 return PPCISD::TC_RETURN;
5497
5498 unsigned RetOpc = 0;
5499 // This is a call through a function pointer.
5500 if (CFlags.IsIndirect) {
5501 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5502 // indirect calls. The save of the caller's TOC pointer to the stack will be
5503 // inserted into the DAG as part of call lowering. The restore of the TOC
5504 // pointer is modeled by using a pseudo instruction for the call opcode that
5505 // represents the 2 instruction sequence of an indirect branch and link,
5506 // immediately followed by a load of the TOC pointer from the stack save
5507 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5508 // as it is not saved or used.
5510 : PPCISD::BCTRL;
5511 } else if (Subtarget.isUsingPCRelativeCalls()) {
5512 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5513 RetOpc = PPCISD::CALL_NOTOC;
5514 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5515 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5516 // immediately following the call instruction if the caller and callee may
5517 // have different TOC bases. At link time if the linker determines the calls
5518 // may not share a TOC base, the call is redirected to a trampoline inserted
5519 // by the linker. The trampoline will (among other things) save the callers
5520 // TOC pointer at an ABI designated offset in the linkage area and the
5521 // linker will rewrite the nop to be a load of the TOC pointer from the
5522 // linkage area into gpr2.
5523 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5524 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5525 RetOpc =
5526 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5527 } else
5528 RetOpc = PPCISD::CALL;
5529 if (IsStrictFPCall) {
5530 switch (RetOpc) {
5531 default:
5532 llvm_unreachable("Unknown call opcode");
5535 break;
5536 case PPCISD::BCTRL:
5537 RetOpc = PPCISD::BCTRL_RM;
5538 break;
5539 case PPCISD::CALL_NOTOC:
5540 RetOpc = PPCISD::CALL_NOTOC_RM;
5541 break;
5542 case PPCISD::CALL:
5543 RetOpc = PPCISD::CALL_RM;
5544 break;
5545 case PPCISD::CALL_NOP:
5546 RetOpc = PPCISD::CALL_NOP_RM;
5547 break;
5548 }
5549 }
5550 return RetOpc;
5551}
5552
5553static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5554 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5555 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5556 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5557 return SDValue(Dest, 0);
5558
5559 // Returns true if the callee is local, and false otherwise.
5560 auto isLocalCallee = [&]() {
5561 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5562 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5563
5564 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5565 !isa_and_nonnull<GlobalIFunc>(GV);
5566 };
5567
5568 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5569 // a static relocation model causes some versions of GNU LD (2.17.50, at
5570 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5571 // built with secure-PLT.
5572 bool UsePlt =
5573 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5575
5576 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5577 const TargetMachine &TM = Subtarget.getTargetMachine();
5578 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5579 auto *S =
5580 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5581
5583 return DAG.getMCSymbol(S, PtrVT);
5584 };
5585
5586 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5587 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5588 if (isFunctionGlobalAddress(GV)) {
5589 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5590
5591 if (Subtarget.isAIXABI()) {
5592 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5593 return getAIXFuncEntryPointSymbolSDNode(GV);
5594 }
5595 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5596 UsePlt ? PPCII::MO_PLT : 0);
5597 }
5598
5599 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5600 const char *SymName = S->getSymbol();
5601 if (Subtarget.isAIXABI()) {
5602 // If there exists a user-declared function whose name is the same as the
5603 // ExternalSymbol's, then we pick up the user-declared version.
5605 if (const Function *F =
5606 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5607 return getAIXFuncEntryPointSymbolSDNode(F);
5608
5609 // On AIX, direct function calls reference the symbol for the function's
5610 // entry point, which is named by prepending a "." before the function's
5611 // C-linkage name. A Qualname is returned here because an external
5612 // function entry point is a csect with XTY_ER property.
5613 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5614 auto &Context = DAG.getMachineFunction().getContext();
5615 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5616 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5618 return Sec->getQualNameSymbol();
5619 };
5620
5621 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5622 }
5623 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5624 UsePlt ? PPCII::MO_PLT : 0);
5625 }
5626
5627 // No transformation needed.
5628 assert(Callee.getNode() && "What no callee?");
5629 return Callee;
5630}
5631
5633 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5634 "Expected a CALLSEQ_STARTSDNode.");
5635
5636 // The last operand is the chain, except when the node has glue. If the node
5637 // has glue, then the last operand is the glue, and the chain is the second
5638 // last operand.
5639 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5640 if (LastValue.getValueType() != MVT::Glue)
5641 return LastValue;
5642
5643 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5644}
5645
5646// Creates the node that moves a functions address into the count register
5647// to prepare for an indirect call instruction.
5648static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5649 SDValue &Glue, SDValue &Chain,
5650 const SDLoc &dl) {
5651 SDValue MTCTROps[] = {Chain, Callee, Glue};
5652 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5653 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5654 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5655 // The glue is the second value produced.
5656 Glue = Chain.getValue(1);
5657}
5658
5660 SDValue &Glue, SDValue &Chain,
5661 SDValue CallSeqStart,
5662 const CallBase *CB, const SDLoc &dl,
5663 bool hasNest,
5664 const PPCSubtarget &Subtarget) {
5665 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5666 // entry point, but to the function descriptor (the function entry point
5667 // address is part of the function descriptor though).
5668 // The function descriptor is a three doubleword structure with the
5669 // following fields: function entry point, TOC base address and
5670 // environment pointer.
5671 // Thus for a call through a function pointer, the following actions need
5672 // to be performed:
5673 // 1. Save the TOC of the caller in the TOC save area of its stack
5674 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5675 // 2. Load the address of the function entry point from the function
5676 // descriptor.
5677 // 3. Load the TOC of the callee from the function descriptor into r2.
5678 // 4. Load the environment pointer from the function descriptor into
5679 // r11.
5680 // 5. Branch to the function entry point address.
5681 // 6. On return of the callee, the TOC of the caller needs to be
5682 // restored (this is done in FinishCall()).
5683 //
5684 // The loads are scheduled at the beginning of the call sequence, and the
5685 // register copies are flagged together to ensure that no other
5686 // operations can be scheduled in between. E.g. without flagging the
5687 // copies together, a TOC access in the caller could be scheduled between
5688 // the assignment of the callee TOC and the branch to the callee, which leads
5689 // to incorrect code.
5690
5691 // Start by loading the function address from the descriptor.
5692 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5693 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5697
5698 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5699
5700 // Registers used in building the DAG.
5701 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5702 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5703
5704 // Offsets of descriptor members.
5705 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5706 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5707
5708 const MVT RegVT = Subtarget.getScalarIntVT();
5709 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5710
5711 // One load for the functions entry point address.
5712 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5713 Alignment, MMOFlags);
5714
5715 // One for loading the TOC anchor for the module that contains the called
5716 // function.
5717 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5718 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5719 SDValue TOCPtr =
5720 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5721 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5722
5723 // One for loading the environment pointer.
5724 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5725 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5726 SDValue LoadEnvPtr =
5727 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5728 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5729
5730
5731 // Then copy the newly loaded TOC anchor to the TOC pointer.
5732 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5733 Chain = TOCVal.getValue(0);
5734 Glue = TOCVal.getValue(1);
5735
5736 // If the function call has an explicit 'nest' parameter, it takes the
5737 // place of the environment pointer.
5738 assert((!hasNest || !Subtarget.isAIXABI()) &&
5739 "Nest parameter is not supported on AIX.");
5740 if (!hasNest) {
5741 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5742 Chain = EnvVal.getValue(0);
5743 Glue = EnvVal.getValue(1);
5744 }
5745
5746 // The rest of the indirect call sequence is the same as the non-descriptor
5747 // DAG.
5748 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5749}
5750
5751static void
5753 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5754 SelectionDAG &DAG,
5755 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5756 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5757 const PPCSubtarget &Subtarget) {
5758 const bool IsPPC64 = Subtarget.isPPC64();
5759 // MVT for a general purpose register.
5760 const MVT RegVT = Subtarget.getScalarIntVT();
5761
5762 // First operand is always the chain.
5763 Ops.push_back(Chain);
5764
5765 // If it's a direct call pass the callee as the second operand.
5766 if (!CFlags.IsIndirect)
5767 Ops.push_back(Callee);
5768 else {
5769 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5770
5771 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5772 // on the stack (this would have been done in `LowerCall_64SVR4` or
5773 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5774 // represents both the indirect branch and a load that restores the TOC
5775 // pointer from the linkage area. The operand for the TOC restore is an add
5776 // of the TOC save offset to the stack pointer. This must be the second
5777 // operand: after the chain input but before any other variadic arguments.
5778 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5779 // saved or used.
5780 if (isTOCSaveRestoreRequired(Subtarget)) {
5781 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5782
5783 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5784 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5785 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5786 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5787 Ops.push_back(AddTOC);
5788 }
5789
5790 // Add the register used for the environment pointer.
5791 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5793 RegVT));
5794
5795
5796 // Add CTR register as callee so a bctr can be emitted later.
5797 if (CFlags.IsTailCall)
5798 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5799 }
5800
5801 // If this is a tail call add stack pointer delta.
5802 if (CFlags.IsTailCall)
5803 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5804
5805 // Add argument registers to the end of the list so that they are known live
5806 // into the call.
5807 for (const auto &[Reg, N] : RegsToPass)
5808 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5809
5810 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5811 // no way to mark dependencies as implicit here.
5812 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5813 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5814 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5815 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5816
5817 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5818 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5819 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5820
5821 // Add a register mask operand representing the call-preserved registers.
5822 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5823 const uint32_t *Mask =
5824 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5825 assert(Mask && "Missing call preserved mask for calling convention");
5826 Ops.push_back(DAG.getRegisterMask(Mask));
5827
5828 // If the glue is valid, it is the last operand.
5829 if (Glue.getNode())
5830 Ops.push_back(Glue);
5831}
5832
5833SDValue PPCTargetLowering::FinishCall(
5834 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5835 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5836 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5837 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5838 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5839
5840 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5841 Subtarget.isAIXABI())
5842 setUsesTOCBasePtr(DAG);
5843
5844 unsigned CallOpc =
5845 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5846 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5847
5848 if (!CFlags.IsIndirect)
5849 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5850 else if (Subtarget.usesFunctionDescriptors())
5851 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5852 dl, CFlags.HasNest, Subtarget);
5853 else
5854 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5855
5856 // Build the operand list for the call instruction.
5858 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5859 SPDiff, Subtarget);
5860
5861 // Emit tail call.
5862 if (CFlags.IsTailCall) {
5863 // Indirect tail call when using PC Relative calls do not have the same
5864 // constraints.
5865 assert(((Callee.getOpcode() == ISD::Register &&
5866 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5867 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5868 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5869 isa<ConstantSDNode>(Callee) ||
5870 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5871 "Expecting a global address, external symbol, absolute value, "
5872 "register or an indirect tail call when PC Relative calls are "
5873 "used.");
5874 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5875 assert(CallOpc == PPCISD::TC_RETURN &&
5876 "Unexpected call opcode for a tail call.");
5878 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5879 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5880 return Ret;
5881 }
5882
5883 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5884 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5885 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5886 Glue = Chain.getValue(1);
5887
5888 // When performing tail call optimization the callee pops its arguments off
5889 // the stack. Account for this here so these bytes can be pushed back on in
5890 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5891 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5893 ? NumBytes
5894 : 0;
5895
5896 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5897 Glue = Chain.getValue(1);
5898
5899 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5900 DAG, InVals);
5901}
5902
5904 CallingConv::ID CalleeCC = CB->getCallingConv();
5905 const Function *CallerFunc = CB->getCaller();
5906 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5907 const Function *CalleeFunc = CB->getCalledFunction();
5908 if (!CalleeFunc)
5909 return false;
5910 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5911
5914
5915 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5916 CalleeFunc->getAttributes(), Outs, *this,
5917 CalleeFunc->getDataLayout());
5918
5919 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5920 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5921 false /*isCalleeExternalSymbol*/);
5922}
5923
5924bool PPCTargetLowering::isEligibleForTCO(
5925 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5926 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5928 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5929 bool isCalleeExternalSymbol) const {
5930 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5931 return false;
5932
5933 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5934 return IsEligibleForTailCallOptimization_64SVR4(
5935 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5936 isCalleeExternalSymbol);
5937 else
5938 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5939 isVarArg, Ins);
5940}
5941
5942SDValue
5943PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5944 SmallVectorImpl<SDValue> &InVals) const {
5945 SelectionDAG &DAG = CLI.DAG;
5946 SDLoc &dl = CLI.DL;
5948 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5950 SDValue Chain = CLI.Chain;
5951 SDValue Callee = CLI.Callee;
5952 bool &isTailCall = CLI.IsTailCall;
5953 CallingConv::ID CallConv = CLI.CallConv;
5954 bool isVarArg = CLI.IsVarArg;
5955 bool isPatchPoint = CLI.IsPatchPoint;
5956 const CallBase *CB = CLI.CB;
5957
5958 if (isTailCall) {
5960 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5961 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5962 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5963 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5964
5965 isTailCall =
5966 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5967 &(MF.getFunction()), IsCalleeExternalSymbol);
5968 if (isTailCall) {
5969 ++NumTailCalls;
5970 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5971 ++NumSiblingCalls;
5972
5973 // PC Relative calls no longer guarantee that the callee is a Global
5974 // Address Node. The callee could be an indirect tail call in which
5975 // case the SDValue for the callee could be a load (to load the address
5976 // of a function pointer) or it may be a register copy (to move the
5977 // address of the callee from a function parameter into a virtual
5978 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5979 assert((Subtarget.isUsingPCRelativeCalls() ||
5980 isa<GlobalAddressSDNode>(Callee)) &&
5981 "Callee should be an llvm::Function object.");
5982
5983 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5984 << "\nTCO callee: ");
5985 LLVM_DEBUG(Callee.dump());
5986 }
5987 }
5988
5989 if (!isTailCall && CB && CB->isMustTailCall())
5990 report_fatal_error("failed to perform tail call elimination on a call "
5991 "site marked musttail");
5992
5993 // When long calls (i.e. indirect calls) are always used, calls are always
5994 // made via function pointer. If we have a function name, first translate it
5995 // into a pointer.
5996 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5997 !isTailCall)
5998 Callee = LowerGlobalAddress(Callee, DAG);
5999
6000 CallFlags CFlags(
6001 CallConv, isTailCall, isVarArg, isPatchPoint,
6002 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
6003 // hasNest
6004 Subtarget.is64BitELFABI() &&
6005 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
6006 CLI.NoMerge);
6007
6008 if (Subtarget.isAIXABI())
6009 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6010 InVals, CB);
6011
6012 assert(Subtarget.isSVR4ABI());
6013 if (Subtarget.isPPC64())
6014 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6015 InVals, CB);
6016 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6017 InVals, CB);
6018}
6019
6020SDValue PPCTargetLowering::LowerCall_32SVR4(
6021 SDValue Chain, SDValue Callee, CallFlags CFlags,
6023 const SmallVectorImpl<SDValue> &OutVals,
6024 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6026 const CallBase *CB) const {
6027 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
6028 // of the 32-bit SVR4 ABI stack frame layout.
6029
6030 const CallingConv::ID CallConv = CFlags.CallConv;
6031 const bool IsVarArg = CFlags.IsVarArg;
6032 const bool IsTailCall = CFlags.IsTailCall;
6033
6034 assert((CallConv == CallingConv::C ||
6035 CallConv == CallingConv::Cold ||
6036 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6037
6038 const Align PtrAlign(4);
6039
6041
6042 // Mark this function as potentially containing a function that contains a
6043 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6044 // and restoring the callers stack pointer in this functions epilog. This is
6045 // done because by tail calling the called function might overwrite the value
6046 // in this function's (MF) stack pointer stack slot 0(SP).
6047 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6048 CallConv == CallingConv::Fast)
6049 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6050
6051 // Count how many bytes are to be pushed on the stack, including the linkage
6052 // area, parameter list area and the part of the local variable space which
6053 // contains copies of aggregates which are passed by value.
6054
6055 // Assign locations to all of the outgoing arguments.
6057 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6058
6059 // Reserve space for the linkage area on the stack.
6060 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6061 PtrAlign);
6062
6063 if (IsVarArg) {
6064 // Handle fixed and variable vector arguments differently.
6065 // Fixed vector arguments go into registers as long as registers are
6066 // available. Variable vector arguments always go into memory.
6067 unsigned NumArgs = Outs.size();
6068
6069 for (unsigned i = 0; i != NumArgs; ++i) {
6070 MVT ArgVT = Outs[i].VT;
6071 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6072 bool Result;
6073
6074 if (!ArgFlags.isVarArg()) {
6075 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6076 Outs[i].OrigTy, CCInfo);
6077 } else {
6079 ArgFlags, Outs[i].OrigTy, CCInfo);
6080 }
6081
6082 if (Result) {
6083#ifndef NDEBUG
6084 errs() << "Call operand #" << i << " has unhandled type "
6085 << ArgVT << "\n";
6086#endif
6087 llvm_unreachable(nullptr);
6088 }
6089 }
6090 } else {
6091 // All arguments are treated the same.
6092 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6093 }
6094
6095 // Assign locations to all of the outgoing aggregate by value arguments.
6096 SmallVector<CCValAssign, 16> ByValArgLocs;
6097 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6098
6099 // Reserve stack space for the allocations in CCInfo.
6100 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6101
6102 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6103
6104 // Size of the linkage area, parameter list area and the part of the local
6105 // space variable where copies of aggregates which are passed by value are
6106 // stored.
6107 unsigned NumBytes = CCByValInfo.getStackSize();
6108
6109 // Calculate by how many bytes the stack has to be adjusted in case of tail
6110 // call optimization.
6111 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6112
6113 // Adjust the stack pointer for the new arguments...
6114 // These operations are automatically eliminated by the prolog/epilog pass
6115 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6116 SDValue CallSeqStart = Chain;
6117
6118 // Load the return address and frame pointer so it can be moved somewhere else
6119 // later.
6120 SDValue LROp, FPOp;
6121 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6122
6123 // Set up a copy of the stack pointer for use loading and storing any
6124 // arguments that may not fit in the registers available for argument
6125 // passing.
6126 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6127
6129 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6130 SmallVector<SDValue, 8> MemOpChains;
6131
6132 bool seenFloatArg = false;
6133 // Walk the register/memloc assignments, inserting copies/loads.
6134 // i - Tracks the index into the list of registers allocated for the call
6135 // RealArgIdx - Tracks the index into the list of actual function arguments
6136 // j - Tracks the index into the list of byval arguments
6137 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6138 i != e;
6139 ++i, ++RealArgIdx) {
6140 CCValAssign &VA = ArgLocs[i];
6141 SDValue Arg = OutVals[RealArgIdx];
6142 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6143
6144 if (Flags.isByVal()) {
6145 // Argument is an aggregate which is passed by value, thus we need to
6146 // create a copy of it in the local variable space of the current stack
6147 // frame (which is the stack frame of the caller) and pass the address of
6148 // this copy to the callee.
6149 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6150 CCValAssign &ByValVA = ByValArgLocs[j++];
6151 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6152
6153 // Memory reserved in the local variable space of the callers stack frame.
6154 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6155
6156 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6157 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6158 StackPtr, PtrOff);
6159
6160 // Create a copy of the argument in the local area of the current
6161 // stack frame.
6162 SDValue MemcpyCall =
6163 CreateCopyOfByValArgument(Arg, PtrOff,
6164 CallSeqStart.getNode()->getOperand(0),
6165 Flags, DAG, dl);
6166
6167 // This must go outside the CALLSEQ_START..END.
6168 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6169 SDLoc(MemcpyCall));
6170 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6171 NewCallSeqStart.getNode());
6172 Chain = CallSeqStart = NewCallSeqStart;
6173
6174 // Pass the address of the aggregate copy on the stack either in a
6175 // physical register or in the parameter list area of the current stack
6176 // frame to the callee.
6177 Arg = PtrOff;
6178 }
6179
6180 // When useCRBits() is true, there can be i1 arguments.
6181 // It is because getRegisterType(MVT::i1) => MVT::i1,
6182 // and for other integer types getRegisterType() => MVT::i32.
6183 // Extend i1 and ensure callee will get i32.
6184 if (Arg.getValueType() == MVT::i1)
6185 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6186 dl, MVT::i32, Arg);
6187
6188 if (VA.isRegLoc()) {
6189 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6190 // Put argument in a physical register.
6191 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6192 bool IsLE = Subtarget.isLittleEndian();
6193 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6194 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6195 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6196 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6197 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6198 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6199 SVal.getValue(0)));
6200 } else
6201 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6202 } else {
6203 // Put argument in the parameter list area of the current stack frame.
6204 assert(VA.isMemLoc());
6205 unsigned LocMemOffset = VA.getLocMemOffset();
6206
6207 if (!IsTailCall) {
6208 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6209 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6210 StackPtr, PtrOff);
6211
6212 MemOpChains.push_back(
6213 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6214 } else {
6215 // Calculate and remember argument location.
6216 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6217 TailCallArguments);
6218 }
6219 }
6220 }
6221
6222 if (!MemOpChains.empty())
6223 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6224
6225 // Build a sequence of copy-to-reg nodes chained together with token chain
6226 // and flag operands which copy the outgoing args into the appropriate regs.
6227 SDValue InGlue;
6228 for (const auto &[Reg, N] : RegsToPass) {
6229 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6230 InGlue = Chain.getValue(1);
6231 }
6232
6233 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6234 // registers.
6235 if (IsVarArg) {
6236 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6237 SDValue Ops[] = { Chain, InGlue };
6238
6239 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6240 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6241
6242 InGlue = Chain.getValue(1);
6243 }
6244
6245 if (IsTailCall)
6246 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6247 TailCallArguments);
6248
6249 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6250 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6251}
6252
6253// Copy an argument into memory, being careful to do this outside the
6254// call sequence for the call to which the argument belongs.
6255SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6256 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6257 SelectionDAG &DAG, const SDLoc &dl) const {
6258 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6259 CallSeqStart.getNode()->getOperand(0),
6260 Flags, DAG, dl);
6261 // The MEMCPY must go outside the CALLSEQ_START..END.
6262 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6263 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6264 SDLoc(MemcpyCall));
6265 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6266 NewCallSeqStart.getNode());
6267 return NewCallSeqStart;
6268}
6269
6270SDValue PPCTargetLowering::LowerCall_64SVR4(
6271 SDValue Chain, SDValue Callee, CallFlags CFlags,
6273 const SmallVectorImpl<SDValue> &OutVals,
6274 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6276 const CallBase *CB) const {
6277 bool isELFv2ABI = Subtarget.isELFv2ABI();
6278 bool isLittleEndian = Subtarget.isLittleEndian();
6279 unsigned NumOps = Outs.size();
6280 bool IsSibCall = false;
6281 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6282
6283 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6284 unsigned PtrByteSize = 8;
6285
6287
6288 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6289 IsSibCall = true;
6290
6291 // Mark this function as potentially containing a function that contains a
6292 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6293 // and restoring the callers stack pointer in this functions epilog. This is
6294 // done because by tail calling the called function might overwrite the value
6295 // in this function's (MF) stack pointer stack slot 0(SP).
6296 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6297 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6298
6299 assert(!(IsFastCall && CFlags.IsVarArg) &&
6300 "fastcc not supported on varargs functions");
6301
6302 // Count how many bytes are to be pushed on the stack, including the linkage
6303 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6304 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6305 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6306 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6307 unsigned NumBytes = LinkageSize;
6308 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6309
6310 static const MCPhysReg GPR[] = {
6311 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6312 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6313 };
6314 static const MCPhysReg VR[] = {
6315 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6316 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6317 };
6318
6319 const unsigned NumGPRs = std::size(GPR);
6320 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6321 const unsigned NumVRs = std::size(VR);
6322
6323 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6324 // can be passed to the callee in registers.
6325 // For the fast calling convention, there is another check below.
6326 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6327 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6328 if (!HasParameterArea) {
6329 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6330 unsigned AvailableFPRs = NumFPRs;
6331 unsigned AvailableVRs = NumVRs;
6332 unsigned NumBytesTmp = NumBytes;
6333 for (unsigned i = 0; i != NumOps; ++i) {
6334 if (Outs[i].Flags.isNest()) continue;
6335 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6336 PtrByteSize, LinkageSize, ParamAreaSize,
6337 NumBytesTmp, AvailableFPRs, AvailableVRs))
6338 HasParameterArea = true;
6339 }
6340 }
6341
6342 // When using the fast calling convention, we don't provide backing for
6343 // arguments that will be in registers.
6344 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6345
6346 // Avoid allocating parameter area for fastcc functions if all the arguments
6347 // can be passed in the registers.
6348 if (IsFastCall)
6349 HasParameterArea = false;
6350
6351 // Add up all the space actually used.
6352 for (unsigned i = 0; i != NumOps; ++i) {
6353 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6354 EVT ArgVT = Outs[i].VT;
6355 EVT OrigVT = Outs[i].ArgVT;
6356
6357 if (Flags.isNest())
6358 continue;
6359
6360 if (IsFastCall) {
6361 if (Flags.isByVal()) {
6362 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6363 if (NumGPRsUsed > NumGPRs)
6364 HasParameterArea = true;
6365 } else {
6366 switch (ArgVT.getSimpleVT().SimpleTy) {
6367 default: llvm_unreachable("Unexpected ValueType for argument!");
6368 case MVT::i1:
6369 case MVT::i32:
6370 case MVT::i64:
6371 if (++NumGPRsUsed <= NumGPRs)
6372 continue;
6373 break;
6374 case MVT::v4i32:
6375 case MVT::v8i16:
6376 case MVT::v16i8:
6377 case MVT::v2f64:
6378 case MVT::v2i64:
6379 case MVT::v1i128:
6380 case MVT::f128:
6381 if (++NumVRsUsed <= NumVRs)
6382 continue;
6383 break;
6384 case MVT::v4f32:
6385 if (++NumVRsUsed <= NumVRs)
6386 continue;
6387 break;
6388 case MVT::f32:
6389 case MVT::f64:
6390 if (++NumFPRsUsed <= NumFPRs)
6391 continue;
6392 break;
6393 }
6394 HasParameterArea = true;
6395 }
6396 }
6397
6398 /* Respect alignment of argument on the stack. */
6399 auto Alignement =
6400 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6401 NumBytes = alignTo(NumBytes, Alignement);
6402
6403 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6404 if (Flags.isInConsecutiveRegsLast())
6405 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6406 }
6407
6408 unsigned NumBytesActuallyUsed = NumBytes;
6409
6410 // In the old ELFv1 ABI,
6411 // the prolog code of the callee may store up to 8 GPR argument registers to
6412 // the stack, allowing va_start to index over them in memory if its varargs.
6413 // Because we cannot tell if this is needed on the caller side, we have to
6414 // conservatively assume that it is needed. As such, make sure we have at
6415 // least enough stack space for the caller to store the 8 GPRs.
6416 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6417 // really requires memory operands, e.g. a vararg function.
6418 if (HasParameterArea)
6419 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6420 else
6421 NumBytes = LinkageSize;
6422
6423 // Tail call needs the stack to be aligned.
6424 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6425 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6426
6427 int SPDiff = 0;
6428
6429 // Calculate by how many bytes the stack has to be adjusted in case of tail
6430 // call optimization.
6431 if (!IsSibCall)
6432 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6433
6434 // To protect arguments on the stack from being clobbered in a tail call,
6435 // force all the loads to happen before doing any other lowering.
6436 if (CFlags.IsTailCall)
6437 Chain = DAG.getStackArgumentTokenFactor(Chain);
6438
6439 // Adjust the stack pointer for the new arguments...
6440 // These operations are automatically eliminated by the prolog/epilog pass
6441 if (!IsSibCall)
6442 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6443 SDValue CallSeqStart = Chain;
6444
6445 // Load the return address and frame pointer so it can be move somewhere else
6446 // later.
6447 SDValue LROp, FPOp;
6448 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6449
6450 // Set up a copy of the stack pointer for use loading and storing any
6451 // arguments that may not fit in the registers available for argument
6452 // passing.
6453 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6454
6455 // Figure out which arguments are going to go in registers, and which in
6456 // memory. Also, if this is a vararg function, floating point operations
6457 // must be stored to our stack, and loaded into integer regs as well, if
6458 // any integer regs are available for argument passing.
6459 unsigned ArgOffset = LinkageSize;
6460
6462 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6463
6464 SmallVector<SDValue, 8> MemOpChains;
6465 for (unsigned i = 0; i != NumOps; ++i) {
6466 SDValue Arg = OutVals[i];
6467 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6468 EVT ArgVT = Outs[i].VT;
6469 EVT OrigVT = Outs[i].ArgVT;
6470
6471 // PtrOff will be used to store the current argument to the stack if a
6472 // register cannot be found for it.
6473 SDValue PtrOff;
6474
6475 // We re-align the argument offset for each argument, except when using the
6476 // fast calling convention, when we need to make sure we do that only when
6477 // we'll actually use a stack slot.
6478 auto ComputePtrOff = [&]() {
6479 /* Respect alignment of argument on the stack. */
6480 auto Alignment =
6481 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6482 ArgOffset = alignTo(ArgOffset, Alignment);
6483
6484 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6485
6486 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6487 };
6488
6489 if (!IsFastCall) {
6490 ComputePtrOff();
6491
6492 /* Compute GPR index associated with argument offset. */
6493 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6494 GPR_idx = std::min(GPR_idx, NumGPRs);
6495 }
6496
6497 // Promote integers to 64-bit values.
6498 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6499 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6500 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6501 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6502 }
6503
6504 // FIXME memcpy is used way more than necessary. Correctness first.
6505 // Note: "by value" is code for passing a structure by value, not
6506 // basic types.
6507 if (Flags.isByVal()) {
6508 // Note: Size includes alignment padding, so
6509 // struct x { short a; char b; }
6510 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6511 // These are the proper values we need for right-justifying the
6512 // aggregate in a parameter register.
6513 unsigned Size = Flags.getByValSize();
6514
6515 // An empty aggregate parameter takes up no storage and no
6516 // registers.
6517 if (Size == 0)
6518 continue;
6519
6520 if (IsFastCall)
6521 ComputePtrOff();
6522
6523 // All aggregates smaller than 8 bytes must be passed right-justified.
6524 if (Size==1 || Size==2 || Size==4) {
6525 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6526 if (GPR_idx != NumGPRs) {
6527 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6528 MachinePointerInfo(), VT);
6529 MemOpChains.push_back(Load.getValue(1));
6530 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6531
6532 ArgOffset += PtrByteSize;
6533 continue;
6534 }
6535 }
6536
6537 if (GPR_idx == NumGPRs && Size < 8) {
6538 SDValue AddPtr = PtrOff;
6539 if (!isLittleEndian) {
6540 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6541 PtrOff.getValueType());
6542 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6543 }
6544 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6545 CallSeqStart,
6546 Flags, DAG, dl);
6547 ArgOffset += PtrByteSize;
6548 continue;
6549 }
6550 // Copy the object to parameter save area if it can not be entirely passed
6551 // by registers.
6552 // FIXME: we only need to copy the parts which need to be passed in
6553 // parameter save area. For the parts passed by registers, we don't need
6554 // to copy them to the stack although we need to allocate space for them
6555 // in parameter save area.
6556 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6557 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6558 CallSeqStart,
6559 Flags, DAG, dl);
6560
6561 // When a register is available, pass a small aggregate right-justified.
6562 if (Size < 8 && GPR_idx != NumGPRs) {
6563 // The easiest way to get this right-justified in a register
6564 // is to copy the structure into the rightmost portion of a
6565 // local variable slot, then load the whole slot into the
6566 // register.
6567 // FIXME: The memcpy seems to produce pretty awful code for
6568 // small aggregates, particularly for packed ones.
6569 // FIXME: It would be preferable to use the slot in the
6570 // parameter save area instead of a new local variable.
6571 SDValue AddPtr = PtrOff;
6572 if (!isLittleEndian) {
6573 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6574 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6575 }
6576 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6577 CallSeqStart,
6578 Flags, DAG, dl);
6579
6580 // Load the slot into the register.
6581 SDValue Load =
6582 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6583 MemOpChains.push_back(Load.getValue(1));
6584 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6585
6586 // Done with this argument.
6587 ArgOffset += PtrByteSize;
6588 continue;
6589 }
6590
6591 // For aggregates larger than PtrByteSize, copy the pieces of the
6592 // object that fit into registers from the parameter save area.
6593 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6594 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6595 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6596 if (GPR_idx != NumGPRs) {
6597 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6598 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6599 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6600 MachinePointerInfo(), ObjType);
6601
6602 MemOpChains.push_back(Load.getValue(1));
6603 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6604 ArgOffset += PtrByteSize;
6605 } else {
6606 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6607 break;
6608 }
6609 }
6610 continue;
6611 }
6612
6613 switch (Arg.getSimpleValueType().SimpleTy) {
6614 default: llvm_unreachable("Unexpected ValueType for argument!");
6615 case MVT::i1:
6616 case MVT::i32:
6617 case MVT::i64:
6618 if (Flags.isNest()) {
6619 // The 'nest' parameter, if any, is passed in R11.
6620 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6621 break;
6622 }
6623
6624 // These can be scalar arguments or elements of an integer array type
6625 // passed directly. Clang may use those instead of "byval" aggregate
6626 // types to avoid forcing arguments to memory unnecessarily.
6627 if (GPR_idx != NumGPRs) {
6628 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6629 } else {
6630 if (IsFastCall)
6631 ComputePtrOff();
6632
6633 assert(HasParameterArea &&
6634 "Parameter area must exist to pass an argument in memory.");
6635 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6636 true, CFlags.IsTailCall, false, MemOpChains,
6637 TailCallArguments, dl);
6638 if (IsFastCall)
6639 ArgOffset += PtrByteSize;
6640 }
6641 if (!IsFastCall)
6642 ArgOffset += PtrByteSize;
6643 break;
6644 case MVT::f32:
6645 case MVT::f64: {
6646 // These can be scalar arguments or elements of a float array type
6647 // passed directly. The latter are used to implement ELFv2 homogenous
6648 // float aggregates.
6649
6650 // Named arguments go into FPRs first, and once they overflow, the
6651 // remaining arguments go into GPRs and then the parameter save area.
6652 // Unnamed arguments for vararg functions always go to GPRs and
6653 // then the parameter save area. For now, put all arguments to vararg
6654 // routines always in both locations (FPR *and* GPR or stack slot).
6655 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6656 bool NeededLoad = false;
6657
6658 // First load the argument into the next available FPR.
6659 if (FPR_idx != NumFPRs)
6660 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6661
6662 // Next, load the argument into GPR or stack slot if needed.
6663 if (!NeedGPROrStack)
6664 ;
6665 else if (GPR_idx != NumGPRs && !IsFastCall) {
6666 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6667 // once we support fp <-> gpr moves.
6668
6669 // In the non-vararg case, this can only ever happen in the
6670 // presence of f32 array types, since otherwise we never run
6671 // out of FPRs before running out of GPRs.
6672 SDValue ArgVal;
6673
6674 // Double values are always passed in a single GPR.
6675 if (Arg.getValueType() != MVT::f32) {
6676 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6677
6678 // Non-array float values are extended and passed in a GPR.
6679 } else if (!Flags.isInConsecutiveRegs()) {
6680 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6681 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6682
6683 // If we have an array of floats, we collect every odd element
6684 // together with its predecessor into one GPR.
6685 } else if (ArgOffset % PtrByteSize != 0) {
6686 SDValue Lo, Hi;
6687 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6688 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6689 if (!isLittleEndian)
6690 std::swap(Lo, Hi);
6691 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6692
6693 // The final element, if even, goes into the first half of a GPR.
6694 } else if (Flags.isInConsecutiveRegsLast()) {
6695 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6696 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6697 if (!isLittleEndian)
6698 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6699 DAG.getConstant(32, dl, MVT::i32));
6700
6701 // Non-final even elements are skipped; they will be handled
6702 // together the with subsequent argument on the next go-around.
6703 } else
6704 ArgVal = SDValue();
6705
6706 if (ArgVal.getNode())
6707 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6708 } else {
6709 if (IsFastCall)
6710 ComputePtrOff();
6711
6712 // Single-precision floating-point values are mapped to the
6713 // second (rightmost) word of the stack doubleword.
6714 if (Arg.getValueType() == MVT::f32 &&
6715 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6716 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6717 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6718 }
6719
6720 assert(HasParameterArea &&
6721 "Parameter area must exist to pass an argument in memory.");
6722 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6723 true, CFlags.IsTailCall, false, MemOpChains,
6724 TailCallArguments, dl);
6725
6726 NeededLoad = true;
6727 }
6728 // When passing an array of floats, the array occupies consecutive
6729 // space in the argument area; only round up to the next doubleword
6730 // at the end of the array. Otherwise, each float takes 8 bytes.
6731 if (!IsFastCall || NeededLoad) {
6732 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6733 Flags.isInConsecutiveRegs()) ? 4 : 8;
6734 if (Flags.isInConsecutiveRegsLast())
6735 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6736 }
6737 break;
6738 }
6739 case MVT::v4f32:
6740 case MVT::v4i32:
6741 case MVT::v8i16:
6742 case MVT::v16i8:
6743 case MVT::v2f64:
6744 case MVT::v2i64:
6745 case MVT::v1i128:
6746 case MVT::f128:
6747 // These can be scalar arguments or elements of a vector array type
6748 // passed directly. The latter are used to implement ELFv2 homogenous
6749 // vector aggregates.
6750
6751 // For a varargs call, named arguments go into VRs or on the stack as
6752 // usual; unnamed arguments always go to the stack or the corresponding
6753 // GPRs when within range. For now, we always put the value in both
6754 // locations (or even all three).
6755 if (CFlags.IsVarArg) {
6756 assert(HasParameterArea &&
6757 "Parameter area must exist if we have a varargs call.");
6758 // We could elide this store in the case where the object fits
6759 // entirely in R registers. Maybe later.
6760 SDValue Store =
6761 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6762 MemOpChains.push_back(Store);
6763 if (VR_idx != NumVRs) {
6764 SDValue Load =
6765 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6766 MemOpChains.push_back(Load.getValue(1));
6767 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6768 }
6769 ArgOffset += 16;
6770 for (unsigned i=0; i<16; i+=PtrByteSize) {
6771 if (GPR_idx == NumGPRs)
6772 break;
6773 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6774 DAG.getConstant(i, dl, PtrVT));
6775 SDValue Load =
6776 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6777 MemOpChains.push_back(Load.getValue(1));
6778 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6779 }
6780 break;
6781 }
6782
6783 // Non-varargs Altivec params go into VRs or on the stack.
6784 if (VR_idx != NumVRs) {
6785 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6786 } else {
6787 if (IsFastCall)
6788 ComputePtrOff();
6789
6790 assert(HasParameterArea &&
6791 "Parameter area must exist to pass an argument in memory.");
6792 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6793 true, CFlags.IsTailCall, true, MemOpChains,
6794 TailCallArguments, dl);
6795 if (IsFastCall)
6796 ArgOffset += 16;
6797 }
6798
6799 if (!IsFastCall)
6800 ArgOffset += 16;
6801 break;
6802 }
6803 }
6804
6805 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6806 "mismatch in size of parameter area");
6807 (void)NumBytesActuallyUsed;
6808
6809 if (!MemOpChains.empty())
6810 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6811
6812 // Check if this is an indirect call (MTCTR/BCTRL).
6813 // See prepareDescriptorIndirectCall and buildCallOperands for more
6814 // information about calls through function pointers in the 64-bit SVR4 ABI.
6815 if (CFlags.IsIndirect) {
6816 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6817 // caller in the TOC save area.
6818 if (isTOCSaveRestoreRequired(Subtarget)) {
6819 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6820 // Load r2 into a virtual register and store it to the TOC save area.
6821 setUsesTOCBasePtr(DAG);
6822 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6823 // TOC save area offset.
6824 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6825 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6826 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6827 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6829 DAG.getMachineFunction(), TOCSaveOffset));
6830 }
6831 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6832 // This does not mean the MTCTR instruction must use R12; it's easier
6833 // to model this as an extra parameter, so do that.
6834 if (isELFv2ABI && !CFlags.IsPatchPoint)
6835 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6836 }
6837
6838 // Build a sequence of copy-to-reg nodes chained together with token chain
6839 // and flag operands which copy the outgoing args into the appropriate regs.
6840 SDValue InGlue;
6841 for (const auto &[Reg, N] : RegsToPass) {
6842 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6843 InGlue = Chain.getValue(1);
6844 }
6845
6846 if (CFlags.IsTailCall && !IsSibCall)
6847 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6848 TailCallArguments);
6849
6850 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6851 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6852}
6853
6854// Returns true when the shadow of a general purpose argument register
6855// in the parameter save area is aligned to at least 'RequiredAlign'.
6856static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6857 assert(RequiredAlign.value() <= 16 &&
6858 "Required alignment greater than stack alignment.");
6859 switch (Reg) {
6860 default:
6861 report_fatal_error("called on invalid register.");
6862 case PPC::R5:
6863 case PPC::R9:
6864 case PPC::X3:
6865 case PPC::X5:
6866 case PPC::X7:
6867 case PPC::X9:
6868 // These registers are 16 byte aligned which is the most strict aligment
6869 // we can support.
6870 return true;
6871 case PPC::R3:
6872 case PPC::R7:
6873 case PPC::X4:
6874 case PPC::X6:
6875 case PPC::X8:
6876 case PPC::X10:
6877 // The shadow of these registers in the PSA is 8 byte aligned.
6878 return RequiredAlign <= 8;
6879 case PPC::R4:
6880 case PPC::R6:
6881 case PPC::R8:
6882 case PPC::R10:
6883 return RequiredAlign <= 4;
6884 }
6885}
6886
6887static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6888 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6889 Type *OrigTy, CCState &State) {
6890 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6892 const bool IsPPC64 = Subtarget.isPPC64();
6893 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6894 const Align PtrAlign(PtrSize);
6895 const Align StackAlign(16);
6896 const MVT RegVT = Subtarget.getScalarIntVT();
6897
6898 if (ValVT == MVT::f128)
6899 report_fatal_error("f128 is unimplemented on AIX.");
6900
6901 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6902 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6903 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6904 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6905 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6906 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6907
6908 static const MCPhysReg VR[] = {// Vector registers.
6909 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6910 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6911 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6912
6913 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6914
6915 if (ArgFlags.isNest()) {
6916 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6917 if (!EnvReg)
6918 report_fatal_error("More then one nest argument.");
6919 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6920 return false;
6921 }
6922
6923 if (ArgFlags.isByVal()) {
6924 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6925 if (ByValAlign > StackAlign)
6926 report_fatal_error("Pass-by-value arguments with alignment greater than "
6927 "16 are not supported.");
6928
6929 const unsigned ByValSize = ArgFlags.getByValSize();
6930 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6931
6932 // An empty aggregate parameter takes up no storage and no registers,
6933 // but needs a MemLoc for a stack slot for the formal arguments side.
6934 if (ByValSize == 0) {
6936 State.getStackSize(), RegVT, LocInfo));
6937 return false;
6938 }
6939
6940 // Shadow allocate any registers that are not properly aligned.
6941 unsigned NextReg = State.getFirstUnallocated(GPRs);
6942 while (NextReg != GPRs.size() &&
6943 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6944 // Shadow allocate next registers since its aligment is not strict enough.
6945 MCRegister Reg = State.AllocateReg(GPRs);
6946 // Allocate the stack space shadowed by said register.
6947 State.AllocateStack(PtrSize, PtrAlign);
6948 assert(Reg && "Alocating register unexpectedly failed.");
6949 (void)Reg;
6950 NextReg = State.getFirstUnallocated(GPRs);
6951 }
6952
6953 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6954 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6955 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6956 if (MCRegister Reg = State.AllocateReg(GPRs))
6957 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6958 else {
6961 LocInfo));
6962 break;
6963 }
6964 }
6965 return false;
6966 }
6967
6968 // Arguments always reserve parameter save area.
6969 switch (ValVT.SimpleTy) {
6970 default:
6971 report_fatal_error("Unhandled value type for argument.");
6972 case MVT::i64:
6973 // i64 arguments should have been split to i32 for PPC32.
6974 assert(IsPPC64 && "PPC32 should have split i64 values.");
6975 [[fallthrough]];
6976 case MVT::i1:
6977 case MVT::i32: {
6978 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6979 // AIX integer arguments are always passed in register width.
6980 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6981 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6983 if (MCRegister Reg = State.AllocateReg(GPRs))
6984 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6985 else
6986 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6987
6988 return false;
6989 }
6990 case MVT::f32:
6991 case MVT::f64: {
6992 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6993 const unsigned StoreSize = LocVT.getStoreSize();
6994 // Floats are always 4-byte aligned in the PSA on AIX.
6995 // This includes f64 in 64-bit mode for ABI compatibility.
6996 const unsigned Offset =
6997 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6998 MCRegister FReg = State.AllocateReg(FPR);
6999 if (FReg)
7000 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7001
7002 // Reserve and initialize GPRs or initialize the PSA as required.
7003 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
7004 if (MCRegister Reg = State.AllocateReg(GPRs)) {
7005 assert(FReg && "An FPR should be available when a GPR is reserved.");
7006 if (State.isVarArg()) {
7007 // Successfully reserved GPRs are only initialized for vararg calls.
7008 // Custom handling is required for:
7009 // f64 in PPC32 needs to be split into 2 GPRs.
7010 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7011 State.addLoc(
7012 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7013 }
7014 } else {
7015 // If there are insufficient GPRs, the PSA needs to be initialized.
7016 // Initialization occurs even if an FPR was initialized for
7017 // compatibility with the AIX XL compiler. The full memory for the
7018 // argument will be initialized even if a prior word is saved in GPR.
7019 // A custom memLoc is used when the argument also passes in FPR so
7020 // that the callee handling can skip over it easily.
7021 State.addLoc(
7022 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7023 LocInfo)
7024 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7025 break;
7026 }
7027 }
7028
7029 return false;
7030 }
7031 case MVT::v4f32:
7032 case MVT::v4i32:
7033 case MVT::v8i16:
7034 case MVT::v16i8:
7035 case MVT::v2i64:
7036 case MVT::v2f64:
7037 case MVT::v1i128: {
7038 const unsigned VecSize = 16;
7039 const Align VecAlign(VecSize);
7040
7041 if (!State.isVarArg()) {
7042 // If there are vector registers remaining we don't consume any stack
7043 // space.
7044 if (MCRegister VReg = State.AllocateReg(VR)) {
7045 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7046 return false;
7047 }
7048 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7049 // might be allocated in the portion of the PSA that is shadowed by the
7050 // GPRs.
7051 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7052 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053 return false;
7054 }
7055
7056 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7057 // Burn any underaligned registers and their shadowed stack space until
7058 // we reach the required alignment.
7059 while (NextRegIndex != GPRs.size() &&
7060 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7061 // Shadow allocate register and its stack shadow.
7062 MCRegister Reg = State.AllocateReg(GPRs);
7063 State.AllocateStack(PtrSize, PtrAlign);
7064 assert(Reg && "Allocating register unexpectedly failed.");
7065 (void)Reg;
7066 NextRegIndex = State.getFirstUnallocated(GPRs);
7067 }
7068
7069 // Vectors that are passed as fixed arguments are handled differently.
7070 // They are passed in VRs if any are available (unlike arguments passed
7071 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7072 // functions)
7073 if (!ArgFlags.isVarArg()) {
7074 if (MCRegister VReg = State.AllocateReg(VR)) {
7075 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7076 // Shadow allocate GPRs and stack space even though we pass in a VR.
7077 for (unsigned I = 0; I != VecSize; I += PtrSize)
7078 State.AllocateReg(GPRs);
7079 State.AllocateStack(VecSize, VecAlign);
7080 return false;
7081 }
7082 // No vector registers remain so pass on the stack.
7083 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7084 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7085 return false;
7086 }
7087
7088 // If all GPRS are consumed then we pass the argument fully on the stack.
7089 if (NextRegIndex == GPRs.size()) {
7090 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7091 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7092 return false;
7093 }
7094
7095 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7096 // half of the argument, and then need to pass the remaining half on the
7097 // stack.
7098 if (GPRs[NextRegIndex] == PPC::R9) {
7099 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7100 State.addLoc(
7101 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7102
7103 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7104 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7105 assert(FirstReg && SecondReg &&
7106 "Allocating R9 or R10 unexpectedly failed.");
7107 State.addLoc(
7108 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7109 State.addLoc(
7110 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7111 return false;
7112 }
7113
7114 // We have enough GPRs to fully pass the vector argument, and we have
7115 // already consumed any underaligned registers. Start with the custom
7116 // MemLoc and then the custom RegLocs.
7117 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7118 State.addLoc(
7119 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7120 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7121 const MCRegister Reg = State.AllocateReg(GPRs);
7122 assert(Reg && "Failed to allocated register for vararg vector argument");
7123 State.addLoc(
7124 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7125 }
7126 return false;
7127 }
7128 }
7129 return true;
7130}
7131
7132// So far, this function is only used by LowerFormalArguments_AIX()
7134 bool IsPPC64,
7135 bool HasP8Vector,
7136 bool HasVSX) {
7137 assert((IsPPC64 || SVT != MVT::i64) &&
7138 "i64 should have been split for 32-bit codegen.");
7139
7140 switch (SVT) {
7141 default:
7142 report_fatal_error("Unexpected value type for formal argument");
7143 case MVT::i1:
7144 case MVT::i32:
7145 case MVT::i64:
7146 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7147 case MVT::f32:
7148 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7149 case MVT::f64:
7150 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7151 case MVT::v4f32:
7152 case MVT::v4i32:
7153 case MVT::v8i16:
7154 case MVT::v16i8:
7155 case MVT::v2i64:
7156 case MVT::v2f64:
7157 case MVT::v1i128:
7158 return &PPC::VRRCRegClass;
7159 }
7160}
7161
7163 SelectionDAG &DAG, SDValue ArgValue,
7164 MVT LocVT, const SDLoc &dl) {
7165 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7166 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7167
7168 if (Flags.isSExt())
7169 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7170 DAG.getValueType(ValVT));
7171 else if (Flags.isZExt())
7172 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7173 DAG.getValueType(ValVT));
7174
7175 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7176}
7177
7178static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7179 const unsigned LASize = FL->getLinkageSize();
7180
7181 if (PPC::GPRCRegClass.contains(Reg)) {
7182 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7183 "Reg must be a valid argument register!");
7184 return LASize + 4 * (Reg - PPC::R3);
7185 }
7186
7187 if (PPC::G8RCRegClass.contains(Reg)) {
7188 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7189 "Reg must be a valid argument register!");
7190 return LASize + 8 * (Reg - PPC::X3);
7191 }
7192
7193 llvm_unreachable("Only general purpose registers expected.");
7194}
7195
7196// AIX ABI Stack Frame Layout:
7197//
7198// Low Memory +--------------------------------------------+
7199// SP +---> | Back chain | ---+
7200// | +--------------------------------------------+ |
7201// | | Saved Condition Register | |
7202// | +--------------------------------------------+ |
7203// | | Saved Linkage Register | |
7204// | +--------------------------------------------+ | Linkage Area
7205// | | Reserved for compilers | |
7206// | +--------------------------------------------+ |
7207// | | Reserved for binders | |
7208// | +--------------------------------------------+ |
7209// | | Saved TOC pointer | ---+
7210// | +--------------------------------------------+
7211// | | Parameter save area |
7212// | +--------------------------------------------+
7213// | | Alloca space |
7214// | +--------------------------------------------+
7215// | | Local variable space |
7216// | +--------------------------------------------+
7217// | | Float/int conversion temporary |
7218// | +--------------------------------------------+
7219// | | Save area for AltiVec registers |
7220// | +--------------------------------------------+
7221// | | AltiVec alignment padding |
7222// | +--------------------------------------------+
7223// | | Save area for VRSAVE register |
7224// | +--------------------------------------------+
7225// | | Save area for General Purpose registers |
7226// | +--------------------------------------------+
7227// | | Save area for Floating Point registers |
7228// | +--------------------------------------------+
7229// +---- | Back chain |
7230// High Memory +--------------------------------------------+
7231//
7232// Specifications:
7233// AIX 7.2 Assembler Language Reference
7234// Subroutine linkage convention
7235
7236SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7237 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7238 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7239 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7240
7241 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7242 CallConv == CallingConv::Fast) &&
7243 "Unexpected calling convention!");
7244
7245 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7246 report_fatal_error("Tail call support is unimplemented on AIX.");
7247
7248 if (useSoftFloat())
7249 report_fatal_error("Soft float support is unimplemented on AIX.");
7250
7251 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7252
7253 const bool IsPPC64 = Subtarget.isPPC64();
7254 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7255
7256 // Assign locations to all of the incoming arguments.
7259 MachineFrameInfo &MFI = MF.getFrameInfo();
7260 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7261 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7262
7263 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7264 // Reserve space for the linkage area on the stack.
7265 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7266 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7267 uint64_t SaveStackPos = CCInfo.getStackSize();
7268 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7269 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7270
7272
7273 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7274 CCValAssign &VA = ArgLocs[I++];
7275 MVT LocVT = VA.getLocVT();
7276 MVT ValVT = VA.getValVT();
7277 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7278
7279 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7280 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7281 // For compatibility with the AIX XL compiler, the float args in the
7282 // parameter save area are initialized even if the argument is available
7283 // in register. The caller is required to initialize both the register
7284 // and memory, however, the callee can choose to expect it in either.
7285 // The memloc is dismissed here because the argument is retrieved from
7286 // the register.
7287 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7288 continue;
7289
7290 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7291 const TargetRegisterClass *RegClass = getRegClassForSVT(
7292 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7293 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7294 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7295 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7296 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7297 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7298 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7299 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7300 MachinePointerInfo(), Align(PtrByteSize));
7301 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7302 MemOps.push_back(StoreReg);
7303 }
7304
7305 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7306 unsigned StoreSize =
7307 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7308 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7309 }
7310
7311 auto HandleMemLoc = [&]() {
7312 const unsigned LocSize = LocVT.getStoreSize();
7313 const unsigned ValSize = ValVT.getStoreSize();
7314 assert((ValSize <= LocSize) &&
7315 "Object size is larger than size of MemLoc");
7316 int CurArgOffset = VA.getLocMemOffset();
7317 // Objects are right-justified because AIX is big-endian.
7318 if (LocSize > ValSize)
7319 CurArgOffset += LocSize - ValSize;
7320 // Potential tail calls could cause overwriting of argument stack slots.
7321 const bool IsImmutable =
7323 (CallConv == CallingConv::Fast));
7324 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7325 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7326 SDValue ArgValue =
7327 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7328
7329 // While the ABI specifies the argument type is (sign or zero) extended
7330 // out to register width, not all code is compliant. We truncate and
7331 // re-extend to be more forgiving of these callers when the argument type
7332 // is smaller than register width.
7333 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7334 ValVT.isInteger() &&
7335 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7336 // It is possible to have either real integer values
7337 // or integers that were not originally integers.
7338 // In the latter case, these could have came from structs,
7339 // and these integers would not have an extend on the parameter.
7340 // Since these types of integers do not have an extend specified
7341 // in the first place, the type of extend that we do should not matter.
7342 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7343 ? MVT::i8
7344 : ArgVT;
7345 SDValue ArgValueTrunc =
7346 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7347 SDValue ArgValueExt =
7348 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7349 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7350 InVals.push_back(ArgValueExt);
7351 } else {
7352 InVals.push_back(ArgValue);
7353 }
7354 };
7355
7356 // Vector arguments to VaArg functions are passed both on the stack, and
7357 // in any available GPRs. Load the value from the stack and add the GPRs
7358 // as live ins.
7359 if (VA.isMemLoc() && VA.needsCustom()) {
7360 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7361 assert(isVarArg && "Only use custom memloc for vararg.");
7362 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7363 // matching custom RegLocs.
7364 const unsigned OriginalValNo = VA.getValNo();
7365 (void)OriginalValNo;
7366
7367 auto HandleCustomVecRegLoc = [&]() {
7368 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7369 "Missing custom RegLoc.");
7370 VA = ArgLocs[I++];
7371 assert(VA.getValVT().isVector() &&
7372 "Unexpected Val type for custom RegLoc.");
7373 assert(VA.getValNo() == OriginalValNo &&
7374 "ValNo mismatch between custom MemLoc and RegLoc.");
7376 MF.addLiveIn(VA.getLocReg(),
7377 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7378 Subtarget.hasVSX()));
7379 };
7380
7381 HandleMemLoc();
7382 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7383 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7384 // R10.
7385 HandleCustomVecRegLoc();
7386 HandleCustomVecRegLoc();
7387
7388 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7389 // we passed the vector in R5, R6, R7 and R8.
7390 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7391 assert(!IsPPC64 &&
7392 "Only 2 custom RegLocs expected for 64-bit codegen.");
7393 HandleCustomVecRegLoc();
7394 HandleCustomVecRegLoc();
7395 }
7396
7397 continue;
7398 }
7399
7400 if (VA.isRegLoc()) {
7401 if (VA.getValVT().isScalarInteger())
7403 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7404 switch (VA.getValVT().SimpleTy) {
7405 default:
7406 report_fatal_error("Unhandled value type for argument.");
7407 case MVT::f32:
7409 break;
7410 case MVT::f64:
7412 break;
7413 }
7414 } else if (VA.getValVT().isVector()) {
7415 switch (VA.getValVT().SimpleTy) {
7416 default:
7417 report_fatal_error("Unhandled value type for argument.");
7418 case MVT::v16i8:
7420 break;
7421 case MVT::v8i16:
7423 break;
7424 case MVT::v4i32:
7425 case MVT::v2i64:
7426 case MVT::v1i128:
7428 break;
7429 case MVT::v4f32:
7430 case MVT::v2f64:
7432 break;
7433 }
7434 }
7435 }
7436
7437 if (Flags.isByVal() && VA.isMemLoc()) {
7438 const unsigned Size =
7439 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7440 PtrByteSize);
7441 const int FI = MF.getFrameInfo().CreateFixedObject(
7442 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7443 /* IsAliased */ true);
7444 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7445 InVals.push_back(FIN);
7446
7447 continue;
7448 }
7449
7450 if (Flags.isByVal()) {
7451 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7452
7453 const MCPhysReg ArgReg = VA.getLocReg();
7454 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7455
7456 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7457 const int FI = MF.getFrameInfo().CreateFixedObject(
7458 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7459 /* IsAliased */ true);
7460 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7461 InVals.push_back(FIN);
7462
7463 // Add live ins for all the RegLocs for the same ByVal.
7464 const TargetRegisterClass *RegClass =
7465 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7466
7467 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7468 unsigned Offset) {
7469 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7470 // Since the callers side has left justified the aggregate in the
7471 // register, we can simply store the entire register into the stack
7472 // slot.
7473 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7474 // The store to the fixedstack object is needed becuase accessing a
7475 // field of the ByVal will use a gep and load. Ideally we will optimize
7476 // to extracting the value from the register directly, and elide the
7477 // stores when the arguments address is not taken, but that will need to
7478 // be future work.
7479 SDValue Store = DAG.getStore(
7480 CopyFrom.getValue(1), dl, CopyFrom,
7483
7484 MemOps.push_back(Store);
7485 };
7486
7487 unsigned Offset = 0;
7488 HandleRegLoc(VA.getLocReg(), Offset);
7489 Offset += PtrByteSize;
7490 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7491 Offset += PtrByteSize) {
7492 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7493 "RegLocs should be for ByVal argument.");
7494
7495 const CCValAssign RL = ArgLocs[I++];
7496 HandleRegLoc(RL.getLocReg(), Offset);
7498 }
7499
7500 if (Offset != StackSize) {
7501 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7502 "Expected MemLoc for remaining bytes.");
7503 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7504 // Consume the MemLoc.The InVal has already been emitted, so nothing
7505 // more needs to be done.
7506 ++I;
7507 }
7508
7509 continue;
7510 }
7511
7512 if (VA.isRegLoc() && !VA.needsCustom()) {
7513 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7514 Register VReg =
7515 MF.addLiveIn(VA.getLocReg(),
7516 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7517 Subtarget.hasVSX()));
7518 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7519 if (ValVT.isScalarInteger() &&
7520 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7521 ArgValue =
7522 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7523 }
7524 InVals.push_back(ArgValue);
7525 continue;
7526 }
7527 if (VA.isMemLoc()) {
7528 HandleMemLoc();
7529 continue;
7530 }
7531 }
7532
7533 // On AIX a minimum of 8 words is saved to the parameter save area.
7534 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7535 // Area that is at least reserved in the caller of this function.
7536 unsigned CallerReservedArea = std::max<unsigned>(
7537 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7538
7539 // Set the size that is at least reserved in caller of this function. Tail
7540 // call optimized function's reserved stack space needs to be aligned so
7541 // that taking the difference between two stack areas will result in an
7542 // aligned stack.
7543 CallerReservedArea =
7544 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7545 FuncInfo->setMinReservedArea(CallerReservedArea);
7546
7547 if (isVarArg) {
7548 FuncInfo->setVarArgsFrameIndex(
7549 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7550 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7551
7552 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7553 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7554
7555 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7556 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7557 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7558
7559 // The fixed integer arguments of a variadic function are stored to the
7560 // VarArgsFrameIndex on the stack so that they may be loaded by
7561 // dereferencing the result of va_next.
7562 for (unsigned GPRIndex =
7563 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7564 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7565
7566 const Register VReg =
7567 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7568 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7569
7570 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7571 SDValue Store =
7572 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7573 MemOps.push_back(Store);
7574 // Increment the address for the next argument to store.
7575 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7576 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7577 }
7578 }
7579
7580 if (!MemOps.empty())
7581 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7582
7583 return Chain;
7584}
7585
7586SDValue PPCTargetLowering::LowerCall_AIX(
7587 SDValue Chain, SDValue Callee, CallFlags CFlags,
7589 const SmallVectorImpl<SDValue> &OutVals,
7590 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7592 const CallBase *CB) const {
7593 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7594 // AIX ABI stack frame layout.
7595
7596 assert((CFlags.CallConv == CallingConv::C ||
7597 CFlags.CallConv == CallingConv::Cold ||
7598 CFlags.CallConv == CallingConv::Fast) &&
7599 "Unexpected calling convention!");
7600
7601 if (CFlags.IsPatchPoint)
7602 report_fatal_error("This call type is unimplemented on AIX.");
7603
7604 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7605
7608 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7609 *DAG.getContext());
7610
7611 // Reserve space for the linkage save area (LSA) on the stack.
7612 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7613 // [SP][CR][LR][2 x reserved][TOC].
7614 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7615 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7616 const bool IsPPC64 = Subtarget.isPPC64();
7617 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7618 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7619 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7620 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7621
7622 // The prolog code of the callee may store up to 8 GPR argument registers to
7623 // the stack, allowing va_start to index over them in memory if the callee
7624 // is variadic.
7625 // Because we cannot tell if this is needed on the caller side, we have to
7626 // conservatively assume that it is needed. As such, make sure we have at
7627 // least enough stack space for the caller to store the 8 GPRs.
7628 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7629 const unsigned NumBytes = std::max<unsigned>(
7630 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7631
7632 // Adjust the stack pointer for the new arguments...
7633 // These operations are automatically eliminated by the prolog/epilog pass.
7634 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7635 SDValue CallSeqStart = Chain;
7636
7638 SmallVector<SDValue, 8> MemOpChains;
7639
7640 // Set up a copy of the stack pointer for loading and storing any
7641 // arguments that may not fit in the registers available for argument
7642 // passing.
7643 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7644 : DAG.getRegister(PPC::R1, MVT::i32);
7645
7646 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7647 const unsigned ValNo = ArgLocs[I].getValNo();
7648 SDValue Arg = OutVals[ValNo];
7649 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7650
7651 if (Flags.isByVal()) {
7652 const unsigned ByValSize = Flags.getByValSize();
7653
7654 // Nothing to do for zero-sized ByVals on the caller side.
7655 if (!ByValSize) {
7656 ++I;
7657 continue;
7658 }
7659
7660 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7661 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7662 (LoadOffset != 0)
7663 ? DAG.getObjectPtrOffset(
7664 dl, Arg, TypeSize::getFixed(LoadOffset))
7665 : Arg,
7666 MachinePointerInfo(), VT);
7667 };
7668
7669 unsigned LoadOffset = 0;
7670
7671 // Initialize registers, which are fully occupied by the by-val argument.
7672 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7673 SDValue Load = GetLoad(PtrVT, LoadOffset);
7674 MemOpChains.push_back(Load.getValue(1));
7675 LoadOffset += PtrByteSize;
7676 const CCValAssign &ByValVA = ArgLocs[I++];
7677 assert(ByValVA.getValNo() == ValNo &&
7678 "Unexpected location for pass-by-value argument.");
7679 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7680 }
7681
7682 if (LoadOffset == ByValSize)
7683 continue;
7684
7685 // There must be one more loc to handle the remainder.
7686 assert(ArgLocs[I].getValNo() == ValNo &&
7687 "Expected additional location for by-value argument.");
7688
7689 if (ArgLocs[I].isMemLoc()) {
7690 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7691 const CCValAssign &ByValVA = ArgLocs[I++];
7692 ISD::ArgFlagsTy MemcpyFlags = Flags;
7693 // Only memcpy the bytes that don't pass in register.
7694 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7695 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7696 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7697 dl, Arg, TypeSize::getFixed(LoadOffset))
7698 : Arg,
7700 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7701 CallSeqStart, MemcpyFlags, DAG, dl);
7702 continue;
7703 }
7704
7705 // Initialize the final register residue.
7706 // Any residue that occupies the final by-val arg register must be
7707 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7708 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7709 // 2 and 1 byte loads.
7710 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7711 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7712 "Unexpected register residue for by-value argument.");
7713 SDValue ResidueVal;
7714 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7715 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7716 const MVT VT =
7717 N == 1 ? MVT::i8
7718 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7719 SDValue Load = GetLoad(VT, LoadOffset);
7720 MemOpChains.push_back(Load.getValue(1));
7721 LoadOffset += N;
7722 Bytes += N;
7723
7724 // By-val arguments are passed left-justfied in register.
7725 // Every load here needs to be shifted, otherwise a full register load
7726 // should have been used.
7727 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7728 "Unexpected load emitted during handling of pass-by-value "
7729 "argument.");
7730 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7731 EVT ShiftAmountTy =
7732 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7733 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7734 SDValue ShiftedLoad =
7735 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7736 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7737 ShiftedLoad)
7738 : ShiftedLoad;
7739 }
7740
7741 const CCValAssign &ByValVA = ArgLocs[I++];
7742 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7743 continue;
7744 }
7745
7746 CCValAssign &VA = ArgLocs[I++];
7747 const MVT LocVT = VA.getLocVT();
7748 const MVT ValVT = VA.getValVT();
7749
7750 switch (VA.getLocInfo()) {
7751 default:
7752 report_fatal_error("Unexpected argument extension type.");
7753 case CCValAssign::Full:
7754 break;
7755 case CCValAssign::ZExt:
7756 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7757 break;
7758 case CCValAssign::SExt:
7759 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7760 break;
7761 }
7762
7763 if (VA.isRegLoc() && !VA.needsCustom()) {
7764 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7765 continue;
7766 }
7767
7768 // Vector arguments passed to VarArg functions need custom handling when
7769 // they are passed (at least partially) in GPRs.
7770 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7771 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7772 // Store value to its stack slot.
7773 SDValue PtrOff =
7774 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7775 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7776 SDValue Store =
7777 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7778 MemOpChains.push_back(Store);
7779 const unsigned OriginalValNo = VA.getValNo();
7780 // Then load the GPRs from the stack
7781 unsigned LoadOffset = 0;
7782 auto HandleCustomVecRegLoc = [&]() {
7783 assert(I != E && "Unexpected end of CCvalAssigns.");
7784 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7785 "Expected custom RegLoc.");
7786 CCValAssign RegVA = ArgLocs[I++];
7787 assert(RegVA.getValNo() == OriginalValNo &&
7788 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7789 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7790 DAG.getConstant(LoadOffset, dl, PtrVT));
7791 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7792 MemOpChains.push_back(Load.getValue(1));
7793 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7794 LoadOffset += PtrByteSize;
7795 };
7796
7797 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7798 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7799 // R10.
7800 HandleCustomVecRegLoc();
7801 HandleCustomVecRegLoc();
7802
7803 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7804 ArgLocs[I].getValNo() == OriginalValNo) {
7805 assert(!IsPPC64 &&
7806 "Only 2 custom RegLocs expected for 64-bit codegen.");
7807 HandleCustomVecRegLoc();
7808 HandleCustomVecRegLoc();
7809 }
7810
7811 continue;
7812 }
7813
7814 if (VA.isMemLoc()) {
7815 SDValue PtrOff =
7816 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7817 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7818 MemOpChains.push_back(
7819 DAG.getStore(Chain, dl, Arg, PtrOff,
7821 Subtarget.getFrameLowering()->getStackAlign()));
7822
7823 continue;
7824 }
7825
7826 if (!ValVT.isFloatingPoint())
7828 "Unexpected register handling for calling convention.");
7829
7830 // Custom handling is used for GPR initializations for vararg float
7831 // arguments.
7832 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7833 LocVT.isInteger() &&
7834 "Custom register handling only expected for VarArg.");
7835
7836 SDValue ArgAsInt =
7837 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7838
7839 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7840 // f32 in 32-bit GPR
7841 // f64 in 64-bit GPR
7842 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7843 else if (Arg.getValueType().getFixedSizeInBits() <
7844 LocVT.getFixedSizeInBits())
7845 // f32 in 64-bit GPR.
7846 RegsToPass.push_back(std::make_pair(
7847 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7848 else {
7849 // f64 in two 32-bit GPRs
7850 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7851 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7852 "Unexpected custom register for argument!");
7853 CCValAssign &GPR1 = VA;
7854 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7855 DAG.getConstant(32, dl, MVT::i8));
7856 RegsToPass.push_back(std::make_pair(
7857 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7858
7859 if (I != E) {
7860 // If only 1 GPR was available, there will only be one custom GPR and
7861 // the argument will also pass in memory.
7862 CCValAssign &PeekArg = ArgLocs[I];
7863 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7864 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7865 CCValAssign &GPR2 = ArgLocs[I++];
7866 RegsToPass.push_back(std::make_pair(
7867 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7868 }
7869 }
7870 }
7871 }
7872
7873 if (!MemOpChains.empty())
7874 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7875
7876 // For indirect calls, we need to save the TOC base to the stack for
7877 // restoration after the call.
7878 if (CFlags.IsIndirect) {
7879 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7880 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7881 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7882 const MVT PtrVT = Subtarget.getScalarIntVT();
7883 const unsigned TOCSaveOffset =
7884 Subtarget.getFrameLowering()->getTOCSaveOffset();
7885
7886 setUsesTOCBasePtr(DAG);
7887 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7888 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7889 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7890 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7891 Chain = DAG.getStore(
7892 Val.getValue(1), dl, Val, AddPtr,
7893 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7894 }
7895
7896 // Build a sequence of copy-to-reg nodes chained together with token chain
7897 // and flag operands which copy the outgoing args into the appropriate regs.
7898 SDValue InGlue;
7899 for (auto Reg : RegsToPass) {
7900 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7901 InGlue = Chain.getValue(1);
7902 }
7903
7904 const int SPDiff = 0;
7905 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7906 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7907}
7908
7909bool
7910PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7911 MachineFunction &MF, bool isVarArg,
7913 LLVMContext &Context,
7914 const Type *RetTy) const {
7916 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7917 return CCInfo.CheckReturn(
7918 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7920 : RetCC_PPC);
7921}
7922
7923SDValue
7924PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7925 bool isVarArg,
7927 const SmallVectorImpl<SDValue> &OutVals,
7928 const SDLoc &dl, SelectionDAG &DAG) const {
7930 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7931 *DAG.getContext());
7932 CCInfo.AnalyzeReturn(Outs,
7933 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7935 : RetCC_PPC);
7936
7937 SDValue Glue;
7938 SmallVector<SDValue, 4> RetOps(1, Chain);
7939
7940 // Copy the result values into the output registers.
7941 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7942 CCValAssign &VA = RVLocs[i];
7943 assert(VA.isRegLoc() && "Can only return in registers!");
7944
7945 SDValue Arg = OutVals[RealResIdx];
7946
7947 switch (VA.getLocInfo()) {
7948 default: llvm_unreachable("Unknown loc info!");
7949 case CCValAssign::Full: break;
7950 case CCValAssign::AExt:
7951 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7952 break;
7953 case CCValAssign::ZExt:
7954 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7955 break;
7956 case CCValAssign::SExt:
7957 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7958 break;
7959 }
7960 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7961 bool isLittleEndian = Subtarget.isLittleEndian();
7962 // Legalize ret f64 -> ret 2 x i32.
7963 SDValue SVal =
7964 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7965 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7966 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7967 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7968 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7969 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7970 Glue = Chain.getValue(1);
7971 VA = RVLocs[++i]; // skip ahead to next loc
7972 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7973 } else
7974 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7975 Glue = Chain.getValue(1);
7976 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7977 }
7978
7979 RetOps[0] = Chain; // Update chain.
7980
7981 // Add the glue if we have it.
7982 if (Glue.getNode())
7983 RetOps.push_back(Glue);
7984
7985 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7986}
7987
7988SDValue
7989PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7990 SelectionDAG &DAG) const {
7991 SDLoc dl(Op);
7992
7993 // Get the correct type for integers.
7994 EVT IntVT = Op.getValueType();
7995
7996 // Get the inputs.
7997 SDValue Chain = Op.getOperand(0);
7998 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7999 // Build a DYNAREAOFFSET node.
8000 SDValue Ops[2] = {Chain, FPSIdx};
8001 SDVTList VTs = DAG.getVTList(IntVT);
8002 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
8003}
8004
8005SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
8006 SelectionDAG &DAG) const {
8007 // When we pop the dynamic allocation we need to restore the SP link.
8008 SDLoc dl(Op);
8009
8010 // Get the correct type for pointers.
8011 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8012
8013 // Construct the stack pointer operand.
8014 bool isPPC64 = Subtarget.isPPC64();
8015 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
8016 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
8017
8018 // Get the operands for the STACKRESTORE.
8019 SDValue Chain = Op.getOperand(0);
8020 SDValue SaveSP = Op.getOperand(1);
8021
8022 // Load the old link SP.
8023 SDValue LoadLinkSP =
8024 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
8025
8026 // Restore the stack pointer.
8027 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
8028
8029 // Store the old link SP.
8030 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
8031}
8032
8033SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
8035 bool isPPC64 = Subtarget.isPPC64();
8036 EVT PtrVT = getPointerTy(MF.getDataLayout());
8037
8038 // Get current frame pointer save index. The users of this index will be
8039 // primarily DYNALLOC instructions.
8041 int RASI = FI->getReturnAddrSaveIndex();
8042
8043 // If the frame pointer save index hasn't been defined yet.
8044 if (!RASI) {
8045 // Find out what the fix offset of the frame pointer save area.
8046 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8047 // Allocate the frame index for frame pointer save area.
8048 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8049 // Save the result.
8050 FI->setReturnAddrSaveIndex(RASI);
8051 }
8052 return DAG.getFrameIndex(RASI, PtrVT);
8053}
8054
8055SDValue
8056PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8058 bool isPPC64 = Subtarget.isPPC64();
8059 EVT PtrVT = getPointerTy(MF.getDataLayout());
8060
8061 // Get current frame pointer save index. The users of this index will be
8062 // primarily DYNALLOC instructions.
8064 int FPSI = FI->getFramePointerSaveIndex();
8065
8066 // If the frame pointer save index hasn't been defined yet.
8067 if (!FPSI) {
8068 // Find out what the fix offset of the frame pointer save area.
8069 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8070 // Allocate the frame index for frame pointer save area.
8071 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8072 // Save the result.
8073 FI->setFramePointerSaveIndex(FPSI);
8074 }
8075 return DAG.getFrameIndex(FPSI, PtrVT);
8076}
8077
8078SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8079 SelectionDAG &DAG) const {
8081 // Get the inputs.
8082 SDValue Chain = Op.getOperand(0);
8083 SDValue Size = Op.getOperand(1);
8084 SDLoc dl(Op);
8085
8086 // Get the correct type for pointers.
8087 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8088 // Negate the size.
8089 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8090 DAG.getConstant(0, dl, PtrVT), Size);
8091 // Construct a node for the frame pointer save index.
8092 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8093 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8094 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8095 if (hasInlineStackProbe(MF))
8096 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8097 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8098}
8099
8100SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8101 SelectionDAG &DAG) const {
8103
8104 bool isPPC64 = Subtarget.isPPC64();
8105 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8106
8107 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8108 return DAG.getFrameIndex(FI, PtrVT);
8109}
8110
8111SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8112 SelectionDAG &DAG) const {
8113 SDLoc DL(Op);
8114 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8115 DAG.getVTList(MVT::i32, MVT::Other),
8116 Op.getOperand(0), Op.getOperand(1));
8117}
8118
8119SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8120 SelectionDAG &DAG) const {
8121 SDLoc DL(Op);
8122 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8123 Op.getOperand(0), Op.getOperand(1));
8124}
8125
8126SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8127 if (Op.getValueType().isVector())
8128 return LowerVectorLoad(Op, DAG);
8129
8130 assert(Op.getValueType() == MVT::i1 &&
8131 "Custom lowering only for i1 loads");
8132
8133 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8134
8135 SDLoc dl(Op);
8136 LoadSDNode *LD = cast<LoadSDNode>(Op);
8137
8138 SDValue Chain = LD->getChain();
8139 SDValue BasePtr = LD->getBasePtr();
8140 MachineMemOperand *MMO = LD->getMemOperand();
8141
8142 SDValue NewLD =
8143 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8144 BasePtr, MVT::i8, MMO);
8145 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8146
8147 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8148 return DAG.getMergeValues(Ops, dl);
8149}
8150
8151SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8152 if (Op.getOperand(1).getValueType().isVector())
8153 return LowerVectorStore(Op, DAG);
8154
8155 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8156 "Custom lowering only for i1 stores");
8157
8158 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8159
8160 SDLoc dl(Op);
8161 StoreSDNode *ST = cast<StoreSDNode>(Op);
8162
8163 SDValue Chain = ST->getChain();
8164 SDValue BasePtr = ST->getBasePtr();
8165 SDValue Value = ST->getValue();
8166 MachineMemOperand *MMO = ST->getMemOperand();
8167
8169 Value);
8170 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8171}
8172
8173// FIXME: Remove this once the ANDI glue bug is fixed:
8174SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8175 assert(Op.getValueType() == MVT::i1 &&
8176 "Custom lowering only for i1 results");
8177
8178 SDLoc DL(Op);
8179 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8180}
8181
8182SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8183 SelectionDAG &DAG) const {
8184
8185 // Implements a vector truncate that fits in a vector register as a shuffle.
8186 // We want to legalize vector truncates down to where the source fits in
8187 // a vector register (and target is therefore smaller than vector register
8188 // size). At that point legalization will try to custom lower the sub-legal
8189 // result and get here - where we can contain the truncate as a single target
8190 // operation.
8191
8192 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8193 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8194 //
8195 // We will implement it for big-endian ordering as this (where x denotes
8196 // undefined):
8197 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8198 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8199 //
8200 // The same operation in little-endian ordering will be:
8201 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8202 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8203
8204 EVT TrgVT = Op.getValueType();
8205 assert(TrgVT.isVector() && "Vector type expected.");
8206 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8207 EVT EltVT = TrgVT.getVectorElementType();
8208 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8209 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8210 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8211 return SDValue();
8212
8213 SDValue N1 = Op.getOperand(0);
8214 EVT SrcVT = N1.getValueType();
8215 unsigned SrcSize = SrcVT.getSizeInBits();
8216 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8217 !llvm::has_single_bit<uint32_t>(
8219 return SDValue();
8220 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8221 return SDValue();
8222
8223 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8224 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8225
8226 SDLoc DL(Op);
8227 SDValue Op1, Op2;
8228 if (SrcSize == 256) {
8229 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8230 EVT SplitVT =
8232 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8233 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8234 DAG.getConstant(0, DL, VecIdxTy));
8235 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8236 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8237 }
8238 else {
8239 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8240 Op2 = DAG.getUNDEF(WideVT);
8241 }
8242
8243 // First list the elements we want to keep.
8244 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8245 SmallVector<int, 16> ShuffV;
8246 if (Subtarget.isLittleEndian())
8247 for (unsigned i = 0; i < TrgNumElts; ++i)
8248 ShuffV.push_back(i * SizeMult);
8249 else
8250 for (unsigned i = 1; i <= TrgNumElts; ++i)
8251 ShuffV.push_back(i * SizeMult - 1);
8252
8253 // Populate the remaining elements with undefs.
8254 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8255 // ShuffV.push_back(i + WideNumElts);
8256 ShuffV.push_back(WideNumElts + 1);
8257
8258 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8259 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8260 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8261}
8262
8263/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8264/// possible.
8265SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8266 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8267 EVT ResVT = Op.getValueType();
8268 EVT CmpVT = Op.getOperand(0).getValueType();
8269 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8270 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8271 SDLoc dl(Op);
8272
8273 // Without power9-vector, we don't have native instruction for f128 comparison.
8274 // Following transformation to libcall is needed for setcc:
8275 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8276 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8277 SDValue Z = DAG.getSetCC(
8278 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8279 LHS, RHS, CC);
8280 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8281 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8282 }
8283
8284 // Not FP, or using SPE? Not a fsel.
8285 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8286 Subtarget.hasSPE())
8287 return Op;
8288
8289 SDNodeFlags Flags = Op.getNode()->getFlags();
8290
8291 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8292 // presence of infinities.
8293 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8294 switch (CC) {
8295 default:
8296 break;
8297 case ISD::SETOGT:
8298 case ISD::SETGT:
8299 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8300 case ISD::SETOLT:
8301 case ISD::SETLT:
8302 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8303 }
8304 }
8305
8306 // We might be able to do better than this under some circumstances, but in
8307 // general, fsel-based lowering of select is a finite-math-only optimization.
8308 // For more information, see section F.3 of the 2.06 ISA specification.
8309 // With ISA 3.0
8310 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8311 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8312 ResVT == MVT::f128)
8313 return Op;
8314
8315 // If the RHS of the comparison is a 0.0, we don't need to do the
8316 // subtraction at all.
8317 SDValue Sel1;
8318 if (isFloatingPointZero(RHS))
8319 switch (CC) {
8320 default: break; // SETUO etc aren't handled by fsel.
8321 case ISD::SETNE:
8322 std::swap(TV, FV);
8323 [[fallthrough]];
8324 case ISD::SETEQ:
8325 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8326 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8327 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8328 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8329 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8330 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8331 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8332 case ISD::SETULT:
8333 case ISD::SETLT:
8334 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8335 [[fallthrough]];
8336 case ISD::SETOGE:
8337 case ISD::SETGE:
8338 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8339 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8340 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8341 case ISD::SETUGT:
8342 case ISD::SETGT:
8343 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8344 [[fallthrough]];
8345 case ISD::SETOLE:
8346 case ISD::SETLE:
8347 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8348 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8349 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8350 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8351 }
8352
8353 SDValue Cmp;
8354 switch (CC) {
8355 default: break; // SETUO etc aren't handled by fsel.
8356 case ISD::SETNE:
8357 std::swap(TV, FV);
8358 [[fallthrough]];
8359 case ISD::SETEQ:
8360 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8361 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8362 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8363 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8364 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8365 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8366 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8367 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8368 case ISD::SETULT:
8369 case ISD::SETLT:
8370 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8371 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8372 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8373 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8374 case ISD::SETOGE:
8375 case ISD::SETGE:
8376 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8377 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8378 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8379 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8380 case ISD::SETUGT:
8381 case ISD::SETGT:
8382 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8383 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8384 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8385 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8386 case ISD::SETOLE:
8387 case ISD::SETLE:
8388 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8389 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8390 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8391 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8392 }
8393 return Op;
8394}
8395
8396static unsigned getPPCStrictOpcode(unsigned Opc) {
8397 switch (Opc) {
8398 default:
8399 llvm_unreachable("No strict version of this opcode!");
8400 case PPCISD::FCTIDZ:
8401 return PPCISD::STRICT_FCTIDZ;
8402 case PPCISD::FCTIWZ:
8403 return PPCISD::STRICT_FCTIWZ;
8404 case PPCISD::FCTIDUZ:
8406 case PPCISD::FCTIWUZ:
8408 case PPCISD::FCFID:
8409 return PPCISD::STRICT_FCFID;
8410 case PPCISD::FCFIDU:
8411 return PPCISD::STRICT_FCFIDU;
8412 case PPCISD::FCFIDS:
8413 return PPCISD::STRICT_FCFIDS;
8414 case PPCISD::FCFIDUS:
8416 }
8417}
8418
8420 const PPCSubtarget &Subtarget) {
8421 SDLoc dl(Op);
8422 bool IsStrict = Op->isStrictFPOpcode();
8423 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8424 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8425
8426 // TODO: Any other flags to propagate?
8427 SDNodeFlags Flags;
8428 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8429
8430 // For strict nodes, source is the second operand.
8431 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8432 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8433 MVT DestTy = Op.getSimpleValueType();
8434 assert(Src.getValueType().isFloatingPoint() &&
8435 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8436 DestTy == MVT::i64) &&
8437 "Invalid FP_TO_INT types");
8438 if (Src.getValueType() == MVT::f32) {
8439 if (IsStrict) {
8440 Src =
8442 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8443 Chain = Src.getValue(1);
8444 } else
8445 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8446 }
8447 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8448 DestTy = Subtarget.getScalarIntVT();
8449 unsigned Opc = ISD::DELETED_NODE;
8450 switch (DestTy.SimpleTy) {
8451 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8452 case MVT::i32:
8453 Opc = IsSigned ? PPCISD::FCTIWZ
8454 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8455 break;
8456 case MVT::i64:
8457 assert((IsSigned || Subtarget.hasFPCVT()) &&
8458 "i64 FP_TO_UINT is supported only with FPCVT");
8459 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8460 }
8461 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8462 SDValue Conv;
8463 if (IsStrict) {
8465 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8466 Flags);
8467 } else {
8468 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8469 }
8470 return Conv;
8471}
8472
8473void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8474 SelectionDAG &DAG,
8475 const SDLoc &dl) const {
8476 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8477 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8478 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8479 bool IsStrict = Op->isStrictFPOpcode();
8480
8481 // Convert the FP value to an int value through memory.
8482 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8483 (IsSigned || Subtarget.hasFPCVT());
8484 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8485 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8486 MachinePointerInfo MPI =
8488
8489 // Emit a store to the stack slot.
8490 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8491 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8492 if (i32Stack) {
8494 Alignment = Align(4);
8495 MachineMemOperand *MMO =
8496 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8497 SDValue Ops[] = { Chain, Tmp, FIPtr };
8498 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8499 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8500 } else
8501 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8502
8503 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8504 // add in a bias on big endian.
8505 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8506 !Subtarget.isLittleEndian()) {
8507 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8508 DAG.getConstant(4, dl, FIPtr.getValueType()));
8509 MPI = MPI.getWithOffset(4);
8510 }
8511
8512 RLI.Chain = Chain;
8513 RLI.Ptr = FIPtr;
8514 RLI.MPI = MPI;
8515 RLI.Alignment = Alignment;
8516}
8517
8518/// Custom lowers floating point to integer conversions to use
8519/// the direct move instructions available in ISA 2.07 to avoid the
8520/// need for load/store combinations.
8521SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8522 SelectionDAG &DAG,
8523 const SDLoc &dl) const {
8524 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8525 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8526 if (Op->isStrictFPOpcode())
8527 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8528 else
8529 return Mov;
8530}
8531
8532SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8533 const SDLoc &dl) const {
8534 bool IsStrict = Op->isStrictFPOpcode();
8535 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8536 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8537 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8538 EVT SrcVT = Src.getValueType();
8539 EVT DstVT = Op.getValueType();
8540
8541 // FP to INT conversions are legal for f128.
8542 if (SrcVT == MVT::f128)
8543 return Subtarget.hasP9Vector() ? Op : SDValue();
8544
8545 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8546 // PPC (the libcall is not available).
8547 if (SrcVT == MVT::ppcf128) {
8548 if (DstVT == MVT::i32) {
8549 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8550 // set other fast-math flags to FP operations in both strict and
8551 // non-strict cases. (FP_TO_SINT, FSUB)
8553 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8554
8555 if (IsSigned) {
8556 SDValue Lo, Hi;
8557 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8558
8559 // Add the two halves of the long double in round-to-zero mode, and use
8560 // a smaller FP_TO_SINT.
8561 if (IsStrict) {
8563 DAG.getVTList(MVT::f64, MVT::Other),
8564 {Op.getOperand(0), Lo, Hi}, Flags);
8565 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8566 DAG.getVTList(MVT::i32, MVT::Other),
8567 {Res.getValue(1), Res}, Flags);
8568 } else {
8569 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8570 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8571 }
8572 } else {
8573 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8574 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8575 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8576 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8577 if (IsStrict) {
8578 // Sel = Src < 0x80000000
8579 // FltOfs = select Sel, 0.0, 0x80000000
8580 // IntOfs = select Sel, 0, 0x80000000
8581 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8582 SDValue Chain = Op.getOperand(0);
8583 EVT SetCCVT =
8584 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8585 EVT DstSetCCVT =
8586 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8587 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8588 Chain, true);
8589 Chain = Sel.getValue(1);
8590
8591 SDValue FltOfs = DAG.getSelect(
8592 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8593 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8594
8595 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8596 DAG.getVTList(SrcVT, MVT::Other),
8597 {Chain, Src, FltOfs}, Flags);
8598 Chain = Val.getValue(1);
8599 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8600 DAG.getVTList(DstVT, MVT::Other),
8601 {Chain, Val}, Flags);
8602 Chain = SInt.getValue(1);
8603 SDValue IntOfs = DAG.getSelect(
8604 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8605 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8606 return DAG.getMergeValues({Result, Chain}, dl);
8607 } else {
8608 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8609 // FIXME: generated code sucks.
8610 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8611 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8612 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8613 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8614 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8615 }
8616 }
8617 }
8618
8619 return SDValue();
8620 }
8621
8622 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8623 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8624
8625 ReuseLoadInfo RLI;
8626 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8627
8628 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8629 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8630}
8631
8632// We're trying to insert a regular store, S, and then a load, L. If the
8633// incoming value, O, is a load, we might just be able to have our load use the
8634// address used by O. However, we don't know if anything else will store to
8635// that address before we can load from it. To prevent this situation, we need
8636// to insert our load, L, into the chain as a peer of O. To do this, we give L
8637// the same chain operand as O, we create a token factor from the chain results
8638// of O and L, and we replace all uses of O's chain result with that token
8639// factor (this last part is handled by makeEquivalentMemoryOrdering).
8640bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8641 ReuseLoadInfo &RLI,
8642 SelectionDAG &DAG,
8643 ISD::LoadExtType ET) const {
8644 // Conservatively skip reusing for constrained FP nodes.
8645 if (Op->isStrictFPOpcode())
8646 return false;
8647
8648 SDLoc dl(Op);
8649 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8650 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8651 if (ET == ISD::NON_EXTLOAD &&
8652 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8653 isOperationLegalOrCustom(Op.getOpcode(),
8654 Op.getOperand(0).getValueType())) {
8655
8656 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8657 return true;
8658 }
8659
8660 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8661 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8662 LD->isNonTemporal())
8663 return false;
8664 if (LD->getMemoryVT() != MemVT)
8665 return false;
8666
8667 // If the result of the load is an illegal type, then we can't build a
8668 // valid chain for reuse since the legalised loads and token factor node that
8669 // ties the legalised loads together uses a different output chain then the
8670 // illegal load.
8671 if (!isTypeLegal(LD->getValueType(0)))
8672 return false;
8673
8674 RLI.Ptr = LD->getBasePtr();
8675 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8676 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8677 "Non-pre-inc AM on PPC?");
8678 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8679 LD->getOffset());
8680 }
8681
8682 RLI.Chain = LD->getChain();
8683 RLI.MPI = LD->getPointerInfo();
8684 RLI.IsDereferenceable = LD->isDereferenceable();
8685 RLI.IsInvariant = LD->isInvariant();
8686 RLI.Alignment = LD->getAlign();
8687 RLI.AAInfo = LD->getAAInfo();
8688 RLI.Ranges = LD->getRanges();
8689
8690 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8691 return true;
8692}
8693
8694/// Analyze profitability of direct move
8695/// prefer float load to int load plus direct move
8696/// when there is no integer use of int load
8697bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8698 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8699 if (Origin->getOpcode() != ISD::LOAD)
8700 return true;
8701
8702 // If there is no LXSIBZX/LXSIHZX, like Power8,
8703 // prefer direct move if the memory size is 1 or 2 bytes.
8704 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8705 if (!Subtarget.hasP9Vector() &&
8706 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8707 return true;
8708
8709 for (SDUse &Use : Origin->uses()) {
8710
8711 // Only look at the users of the loaded value.
8712 if (Use.getResNo() != 0)
8713 continue;
8714
8715 SDNode *User = Use.getUser();
8716 if (User->getOpcode() != ISD::SINT_TO_FP &&
8717 User->getOpcode() != ISD::UINT_TO_FP &&
8718 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8719 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8720 return true;
8721 }
8722
8723 return false;
8724}
8725
8727 const PPCSubtarget &Subtarget,
8728 SDValue Chain = SDValue()) {
8729 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8730 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8731 SDLoc dl(Op);
8732
8733 // TODO: Any other flags to propagate?
8734 SDNodeFlags Flags;
8735 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8736
8737 // If we have FCFIDS, then use it when converting to single-precision.
8738 // Otherwise, convert to double-precision and then round.
8739 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8740 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8741 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8742 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8743 if (Op->isStrictFPOpcode()) {
8744 if (!Chain)
8745 Chain = Op.getOperand(0);
8746 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8747 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8748 } else
8749 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8750}
8751
8752/// Custom lowers integer to floating point conversions to use
8753/// the direct move instructions available in ISA 2.07 to avoid the
8754/// need for load/store combinations.
8755SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8756 SelectionDAG &DAG,
8757 const SDLoc &dl) const {
8758 assert((Op.getValueType() == MVT::f32 ||
8759 Op.getValueType() == MVT::f64) &&
8760 "Invalid floating point type as target of conversion");
8761 assert(Subtarget.hasFPCVT() &&
8762 "Int to FP conversions with direct moves require FPCVT");
8763 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8764 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8765 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8766 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8767 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8768 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8769 return convertIntToFP(Op, Mov, DAG, Subtarget);
8770}
8771
8772static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8773
8774 EVT VecVT = Vec.getValueType();
8775 assert(VecVT.isVector() && "Expected a vector type.");
8776 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8777
8778 EVT EltVT = VecVT.getVectorElementType();
8779 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8780 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8781
8782 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8783 SmallVector<SDValue, 16> Ops(NumConcat);
8784 Ops[0] = Vec;
8785 SDValue UndefVec = DAG.getUNDEF(VecVT);
8786 for (unsigned i = 1; i < NumConcat; ++i)
8787 Ops[i] = UndefVec;
8788
8789 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8790}
8791
8792SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8793 const SDLoc &dl) const {
8794 bool IsStrict = Op->isStrictFPOpcode();
8795 unsigned Opc = Op.getOpcode();
8796 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8799 "Unexpected conversion type");
8800 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8801 "Supports conversions to v2f64/v4f32 only.");
8802
8803 // TODO: Any other flags to propagate?
8805 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8806
8807 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8808 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8809
8810 SDValue Wide = widenVec(DAG, Src, dl);
8811 EVT WideVT = Wide.getValueType();
8812 unsigned WideNumElts = WideVT.getVectorNumElements();
8813 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8814
8815 SmallVector<int, 16> ShuffV;
8816 for (unsigned i = 0; i < WideNumElts; ++i)
8817 ShuffV.push_back(i + WideNumElts);
8818
8819 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8820 int SaveElts = FourEltRes ? 4 : 2;
8821 if (Subtarget.isLittleEndian())
8822 for (int i = 0; i < SaveElts; i++)
8823 ShuffV[i * Stride] = i;
8824 else
8825 for (int i = 1; i <= SaveElts; i++)
8826 ShuffV[i * Stride - 1] = i - 1;
8827
8828 SDValue ShuffleSrc2 =
8829 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8830 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8831
8832 SDValue Extend;
8833 if (SignedConv) {
8834 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8835 EVT ExtVT = Src.getValueType();
8836 if (Subtarget.hasP9Altivec())
8837 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8838 IntermediateVT.getVectorNumElements());
8839
8840 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8841 DAG.getValueType(ExtVT));
8842 } else
8843 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8844
8845 if (IsStrict)
8846 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8847 {Op.getOperand(0), Extend}, Flags);
8848
8849 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8850}
8851
8852SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8853 SelectionDAG &DAG) const {
8854 SDLoc dl(Op);
8855 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8856 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8857 bool IsStrict = Op->isStrictFPOpcode();
8858 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8859 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8860
8861 // TODO: Any other flags to propagate?
8863 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8864
8865 EVT InVT = Src.getValueType();
8866 EVT OutVT = Op.getValueType();
8867 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8868 isOperationCustom(Op.getOpcode(), InVT))
8869 return LowerINT_TO_FPVector(Op, DAG, dl);
8870
8871 // Conversions to f128 are legal.
8872 if (Op.getValueType() == MVT::f128)
8873 return Subtarget.hasP9Vector() ? Op : SDValue();
8874
8875 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8876 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8877 return SDValue();
8878
8879 if (Src.getValueType() == MVT::i1) {
8880 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8881 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8882 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8883 if (IsStrict)
8884 return DAG.getMergeValues({Sel, Chain}, dl);
8885 else
8886 return Sel;
8887 }
8888
8889 // If we have direct moves, we can do all the conversion, skip the store/load
8890 // however, without FPCVT we can't do most conversions.
8891 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8892 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8893 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8894
8895 assert((IsSigned || Subtarget.hasFPCVT()) &&
8896 "UINT_TO_FP is supported only with FPCVT");
8897
8898 if (Src.getValueType() == MVT::i64) {
8899 SDValue SINT = Src;
8900 // When converting to single-precision, we actually need to convert
8901 // to double-precision first and then round to single-precision.
8902 // To avoid double-rounding effects during that operation, we have
8903 // to prepare the input operand. Bits that might be truncated when
8904 // converting to double-precision are replaced by a bit that won't
8905 // be lost at this stage, but is below the single-precision rounding
8906 // position.
8907 //
8908 // However, if -enable-unsafe-fp-math is in effect, accept double
8909 // rounding to avoid the extra overhead.
8910 if (Op.getValueType() == MVT::f32 &&
8911 !Subtarget.hasFPCVT() &&
8913
8914 // Twiddle input to make sure the low 11 bits are zero. (If this
8915 // is the case, we are guaranteed the value will fit into the 53 bit
8916 // mantissa of an IEEE double-precision value without rounding.)
8917 // If any of those low 11 bits were not zero originally, make sure
8918 // bit 12 (value 2048) is set instead, so that the final rounding
8919 // to single-precision gets the correct result.
8920 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8921 SINT, DAG.getConstant(2047, dl, MVT::i64));
8922 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8923 Round, DAG.getConstant(2047, dl, MVT::i64));
8924 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8925 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8926 DAG.getSignedConstant(-2048, dl, MVT::i64));
8927
8928 // However, we cannot use that value unconditionally: if the magnitude
8929 // of the input value is small, the bit-twiddling we did above might
8930 // end up visibly changing the output. Fortunately, in that case, we
8931 // don't need to twiddle bits since the original input will convert
8932 // exactly to double-precision floating-point already. Therefore,
8933 // construct a conditional to use the original value if the top 11
8934 // bits are all sign-bit copies, and use the rounded value computed
8935 // above otherwise.
8936 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8937 SINT, DAG.getConstant(53, dl, MVT::i32));
8938 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8939 Cond, DAG.getConstant(1, dl, MVT::i64));
8940 Cond = DAG.getSetCC(
8941 dl,
8942 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8943 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8944
8945 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8946 }
8947
8948 ReuseLoadInfo RLI;
8949 SDValue Bits;
8950
8952 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8953 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8954 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8955 if (RLI.ResChain)
8956 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8957 } else if (Subtarget.hasLFIWAX() &&
8958 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8959 MachineMemOperand *MMO =
8961 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8962 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8964 DAG.getVTList(MVT::f64, MVT::Other),
8965 Ops, MVT::i32, MMO);
8966 if (RLI.ResChain)
8967 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8968 } else if (Subtarget.hasFPCVT() &&
8969 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8970 MachineMemOperand *MMO =
8972 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8973 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8975 DAG.getVTList(MVT::f64, MVT::Other),
8976 Ops, MVT::i32, MMO);
8977 if (RLI.ResChain)
8978 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8979 } else if (((Subtarget.hasLFIWAX() &&
8980 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8981 (Subtarget.hasFPCVT() &&
8982 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8983 SINT.getOperand(0).getValueType() == MVT::i32) {
8984 MachineFrameInfo &MFI = MF.getFrameInfo();
8985 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8986
8987 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8988 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8989
8990 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8992 DAG.getMachineFunction(), FrameIdx));
8993 Chain = Store;
8994
8995 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8996 "Expected an i32 store");
8997
8998 RLI.Ptr = FIdx;
8999 RLI.Chain = Chain;
9000 RLI.MPI =
9002 RLI.Alignment = Align(4);
9003
9004 MachineMemOperand *MMO =
9006 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9007 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9010 dl, DAG.getVTList(MVT::f64, MVT::Other),
9011 Ops, MVT::i32, MMO);
9012 Chain = Bits.getValue(1);
9013 } else
9014 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
9015
9016 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
9017 if (IsStrict)
9018 Chain = FP.getValue(1);
9019
9020 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9021 if (IsStrict)
9022 FP = DAG.getNode(
9023 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9024 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9025 Flags);
9026 else
9027 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9028 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9029 }
9030 return FP;
9031 }
9032
9033 assert(Src.getValueType() == MVT::i32 &&
9034 "Unhandled INT_TO_FP type in custom expander!");
9035 // Since we only generate this in 64-bit mode, we can take advantage of
9036 // 64-bit registers. In particular, sign extend the input value into the
9037 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9038 // then lfd it and fcfid it.
9040 MachineFrameInfo &MFI = MF.getFrameInfo();
9041 EVT PtrVT = getPointerTy(MF.getDataLayout());
9042
9043 SDValue Ld;
9044 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9045 ReuseLoadInfo RLI;
9046 bool ReusingLoad;
9047 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9048 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9049 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9050
9051 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9053 DAG.getMachineFunction(), FrameIdx));
9054 Chain = Store;
9055
9056 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9057 "Expected an i32 store");
9058
9059 RLI.Ptr = FIdx;
9060 RLI.Chain = Chain;
9061 RLI.MPI =
9063 RLI.Alignment = Align(4);
9064 }
9065
9066 MachineMemOperand *MMO =
9068 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9069 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9070 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9071 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9072 MVT::i32, MMO);
9073 Chain = Ld.getValue(1);
9074 if (ReusingLoad && RLI.ResChain) {
9075 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9076 }
9077 } else {
9078 assert(Subtarget.isPPC64() &&
9079 "i32->FP without LFIWAX supported only on PPC64");
9080
9081 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9082 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9083
9084 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9085
9086 // STD the extended value into the stack slot.
9087 SDValue Store = DAG.getStore(
9088 Chain, dl, Ext64, FIdx,
9090 Chain = Store;
9091
9092 // Load the value as a double.
9093 Ld = DAG.getLoad(
9094 MVT::f64, dl, Chain, FIdx,
9096 Chain = Ld.getValue(1);
9097 }
9098
9099 // FCFID it and return it.
9100 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9101 if (IsStrict)
9102 Chain = FP.getValue(1);
9103 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9104 if (IsStrict)
9105 FP = DAG.getNode(
9106 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9107 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9108 else
9109 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9110 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9111 }
9112 return FP;
9113}
9114
9115SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9116 SelectionDAG &DAG) const {
9117 SDLoc Dl(Op);
9119 EVT PtrVT = getPointerTy(MF.getDataLayout());
9120 SDValue Chain = Op.getOperand(0);
9121
9122 // If requested mode is constant, just use simpler mtfsb/mffscrni
9123 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9124 uint64_t Mode = CVal->getZExtValue();
9125 assert(Mode < 4 && "Unsupported rounding mode!");
9126 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9127 if (Subtarget.isISA3_0())
9128 return SDValue(
9129 DAG.getMachineNode(
9130 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9131 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9132 1);
9133 SDNode *SetHi = DAG.getMachineNode(
9134 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9135 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9136 SDNode *SetLo = DAG.getMachineNode(
9137 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9138 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9139 return SDValue(SetLo, 0);
9140 }
9141
9142 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9143 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9144 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9145 DAG.getConstant(3, Dl, MVT::i32));
9146 SDValue DstFlag = DAG.getNode(
9147 ISD::XOR, Dl, MVT::i32, SrcFlag,
9148 DAG.getNode(ISD::AND, Dl, MVT::i32,
9149 DAG.getNOT(Dl,
9150 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9151 MVT::i32),
9152 One));
9153 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9154 SDValue MFFS;
9155 if (!Subtarget.isISA3_0()) {
9156 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9157 Chain = MFFS.getValue(1);
9158 }
9159 SDValue NewFPSCR;
9160 if (Subtarget.isPPC64()) {
9161 if (Subtarget.isISA3_0()) {
9162 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9163 } else {
9164 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9165 SDNode *InsertRN = DAG.getMachineNode(
9166 PPC::RLDIMI, Dl, MVT::i64,
9167 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9168 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9169 DAG.getTargetConstant(0, Dl, MVT::i32),
9170 DAG.getTargetConstant(62, Dl, MVT::i32)});
9171 NewFPSCR = SDValue(InsertRN, 0);
9172 }
9173 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9174 } else {
9175 // In 32-bit mode, store f64, load and update the lower half.
9176 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9177 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9178 SDValue Addr = Subtarget.isLittleEndian()
9179 ? StackSlot
9180 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9181 DAG.getConstant(4, Dl, PtrVT));
9182 if (Subtarget.isISA3_0()) {
9183 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9184 } else {
9185 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9186 SDValue Tmp =
9187 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9188 Chain = Tmp.getValue(1);
9189 Tmp = SDValue(DAG.getMachineNode(
9190 PPC::RLWIMI, Dl, MVT::i32,
9191 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9192 DAG.getTargetConstant(30, Dl, MVT::i32),
9193 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9194 0);
9195 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9196 }
9197 NewFPSCR =
9198 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9199 Chain = NewFPSCR.getValue(1);
9200 }
9201 if (Subtarget.isISA3_0())
9202 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9203 {NewFPSCR, Chain}),
9204 1);
9205 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9206 SDNode *MTFSF = DAG.getMachineNode(
9207 PPC::MTFSF, Dl, MVT::Other,
9208 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9209 return SDValue(MTFSF, 0);
9210}
9211
9212SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9213 SelectionDAG &DAG) const {
9214 SDLoc dl(Op);
9215 /*
9216 The rounding mode is in bits 30:31 of FPSR, and has the following
9217 settings:
9218 00 Round to nearest
9219 01 Round to 0
9220 10 Round to +inf
9221 11 Round to -inf
9222
9223 GET_ROUNDING, on the other hand, expects the following:
9224 -1 Undefined
9225 0 Round to 0
9226 1 Round to nearest
9227 2 Round to +inf
9228 3 Round to -inf
9229
9230 To perform the conversion, we do:
9231 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9232 */
9233
9235 EVT VT = Op.getValueType();
9236 EVT PtrVT = getPointerTy(MF.getDataLayout());
9237
9238 // Save FP Control Word to register
9239 SDValue Chain = Op.getOperand(0);
9240 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9241 Chain = MFFS.getValue(1);
9242
9243 SDValue CWD;
9244 if (isTypeLegal(MVT::i64)) {
9245 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9246 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9247 } else {
9248 // Save FP register to stack slot
9249 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9250 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9251 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9252
9253 // Load FP Control Word from low 32 bits of stack slot.
9255 "Stack slot adjustment is valid only on big endian subtargets!");
9256 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9257 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9258 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9259 Chain = CWD.getValue(1);
9260 }
9261
9262 // Transform as necessary
9263 SDValue CWD1 =
9264 DAG.getNode(ISD::AND, dl, MVT::i32,
9265 CWD, DAG.getConstant(3, dl, MVT::i32));
9266 SDValue CWD2 =
9267 DAG.getNode(ISD::SRL, dl, MVT::i32,
9268 DAG.getNode(ISD::AND, dl, MVT::i32,
9269 DAG.getNode(ISD::XOR, dl, MVT::i32,
9270 CWD, DAG.getConstant(3, dl, MVT::i32)),
9271 DAG.getConstant(3, dl, MVT::i32)),
9272 DAG.getConstant(1, dl, MVT::i32));
9273
9274 SDValue RetVal =
9275 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9276
9277 RetVal =
9279 dl, VT, RetVal);
9280
9281 return DAG.getMergeValues({RetVal, Chain}, dl);
9282}
9283
9284SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9285 EVT VT = Op.getValueType();
9287 SDLoc dl(Op);
9288 assert(Op.getNumOperands() == 3 &&
9289 VT == Op.getOperand(1).getValueType() &&
9290 "Unexpected SHL!");
9291
9292 // Expand into a bunch of logical ops. Note that these ops
9293 // depend on the PPC behavior for oversized shift amounts.
9294 SDValue Lo = Op.getOperand(0);
9295 SDValue Hi = Op.getOperand(1);
9296 SDValue Amt = Op.getOperand(2);
9297 EVT AmtVT = Amt.getValueType();
9298
9299 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9300 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9301 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9302 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9303 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9304 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9305 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9306 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9307 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9308 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9309 SDValue OutOps[] = { OutLo, OutHi };
9310 return DAG.getMergeValues(OutOps, dl);
9311}
9312
9313SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9314 EVT VT = Op.getValueType();
9315 SDLoc dl(Op);
9317 assert(Op.getNumOperands() == 3 &&
9318 VT == Op.getOperand(1).getValueType() &&
9319 "Unexpected SRL!");
9320
9321 // Expand into a bunch of logical ops. Note that these ops
9322 // depend on the PPC behavior for oversized shift amounts.
9323 SDValue Lo = Op.getOperand(0);
9324 SDValue Hi = Op.getOperand(1);
9325 SDValue Amt = Op.getOperand(2);
9326 EVT AmtVT = Amt.getValueType();
9327
9328 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9329 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9330 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9331 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9332 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9333 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9334 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9335 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9336 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9337 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9338 SDValue OutOps[] = { OutLo, OutHi };
9339 return DAG.getMergeValues(OutOps, dl);
9340}
9341
9342SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9343 SDLoc dl(Op);
9344 EVT VT = Op.getValueType();
9346 assert(Op.getNumOperands() == 3 &&
9347 VT == Op.getOperand(1).getValueType() &&
9348 "Unexpected SRA!");
9349
9350 // Expand into a bunch of logical ops, followed by a select_cc.
9351 SDValue Lo = Op.getOperand(0);
9352 SDValue Hi = Op.getOperand(1);
9353 SDValue Amt = Op.getOperand(2);
9354 EVT AmtVT = Amt.getValueType();
9355
9356 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9357 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9358 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9359 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9360 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9361 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9362 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9363 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9364 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9365 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9366 Tmp4, Tmp6, ISD::SETLE);
9367 SDValue OutOps[] = { OutLo, OutHi };
9368 return DAG.getMergeValues(OutOps, dl);
9369}
9370
9371SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9372 SelectionDAG &DAG) const {
9373 SDLoc dl(Op);
9374 EVT VT = Op.getValueType();
9375 unsigned BitWidth = VT.getSizeInBits();
9376
9377 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9378 SDValue X = Op.getOperand(0);
9379 SDValue Y = Op.getOperand(1);
9380 SDValue Z = Op.getOperand(2);
9381 EVT AmtVT = Z.getValueType();
9382
9383 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9384 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9385 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9386 // on PowerPC shift by BW being well defined.
9387 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9388 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9389 SDValue SubZ =
9390 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9391 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9392 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9393 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9394}
9395
9396//===----------------------------------------------------------------------===//
9397// Vector related lowering.
9398//
9399
9400/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9401/// element size of SplatSize. Cast the result to VT.
9402static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9403 SelectionDAG &DAG, const SDLoc &dl) {
9404 static const MVT VTys[] = { // canonical VT to use for each size.
9405 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9406 };
9407
9408 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9409
9410 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9411 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9412 SplatSize = 1;
9413 Val = 0xFF;
9414 }
9415
9416 EVT CanonicalVT = VTys[SplatSize-1];
9417
9418 // Build a canonical splat for this value.
9419 // Explicitly truncate APInt here, as this API is used with a mix of
9420 // signed and unsigned values.
9421 return DAG.getBitcast(
9422 ReqVT,
9423 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9424}
9425
9426/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9427/// specified intrinsic ID.
9429 const SDLoc &dl, EVT DestVT = MVT::Other) {
9430 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9431 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9432 DAG.getConstant(IID, dl, MVT::i32), Op);
9433}
9434
9435/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9436/// specified intrinsic ID.
9437static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9438 SelectionDAG &DAG, const SDLoc &dl,
9439 EVT DestVT = MVT::Other) {
9440 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9441 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9442 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9443}
9444
9445/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9446/// specified intrinsic ID.
9447static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9448 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9449 EVT DestVT = MVT::Other) {
9450 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9451 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9452 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9453}
9454
9455/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9456/// amount. The result has the specified value type.
9457static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9458 SelectionDAG &DAG, const SDLoc &dl) {
9459 // Force LHS/RHS to be the right type.
9460 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9461 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9462
9463 int Ops[16];
9464 for (unsigned i = 0; i != 16; ++i)
9465 Ops[i] = i + Amt;
9466 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9467 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9468}
9469
9470/// Do we have an efficient pattern in a .td file for this node?
9471///
9472/// \param V - pointer to the BuildVectorSDNode being matched
9473/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9474///
9475/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9476/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9477/// the opposite is true (expansion is beneficial) are:
9478/// - The node builds a vector out of integers that are not 32 or 64-bits
9479/// - The node builds a vector out of constants
9480/// - The node is a "load-and-splat"
9481/// In all other cases, we will choose to keep the BUILD_VECTOR.
9483 bool HasDirectMove,
9484 bool HasP8Vector) {
9485 EVT VecVT = V->getValueType(0);
9486 bool RightType = VecVT == MVT::v2f64 ||
9487 (HasP8Vector && VecVT == MVT::v4f32) ||
9488 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9489 if (!RightType)
9490 return false;
9491
9492 bool IsSplat = true;
9493 bool IsLoad = false;
9494 SDValue Op0 = V->getOperand(0);
9495
9496 // This function is called in a block that confirms the node is not a constant
9497 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9498 // different constants.
9499 if (V->isConstant())
9500 return false;
9501 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9502 if (V->getOperand(i).isUndef())
9503 return false;
9504 // We want to expand nodes that represent load-and-splat even if the
9505 // loaded value is a floating point truncation or conversion to int.
9506 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9507 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9508 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9509 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9510 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9511 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9512 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9513 IsLoad = true;
9514 // If the operands are different or the input is not a load and has more
9515 // uses than just this BV node, then it isn't a splat.
9516 if (V->getOperand(i) != Op0 ||
9517 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9518 IsSplat = false;
9519 }
9520 return !(IsSplat && IsLoad);
9521}
9522
9523// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9524SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9525
9526 SDLoc dl(Op);
9527 SDValue Op0 = Op->getOperand(0);
9528
9529 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9530 (Op.getValueType() != MVT::f128))
9531 return SDValue();
9532
9533 SDValue Lo = Op0.getOperand(0);
9534 SDValue Hi = Op0.getOperand(1);
9535 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9536 return SDValue();
9537
9538 if (!Subtarget.isLittleEndian())
9539 std::swap(Lo, Hi);
9540
9541 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9542}
9543
9544static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9545 const SDValue *InputLoad = &Op;
9546 while (InputLoad->getOpcode() == ISD::BITCAST)
9547 InputLoad = &InputLoad->getOperand(0);
9548 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9550 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9551 InputLoad = &InputLoad->getOperand(0);
9552 }
9553 if (InputLoad->getOpcode() != ISD::LOAD)
9554 return nullptr;
9555 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9556 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9557}
9558
9559// Convert the argument APFloat to a single precision APFloat if there is no
9560// loss in information during the conversion to single precision APFloat and the
9561// resulting number is not a denormal number. Return true if successful.
9563 APFloat APFloatToConvert = ArgAPFloat;
9564 bool LosesInfo = true;
9566 &LosesInfo);
9567 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9568 if (Success)
9569 ArgAPFloat = APFloatToConvert;
9570 return Success;
9571}
9572
9573// Bitcast the argument APInt to a double and convert it to a single precision
9574// APFloat, bitcast the APFloat to an APInt and assign it to the original
9575// argument if there is no loss in information during the conversion from
9576// double to single precision APFloat and the resulting number is not a denormal
9577// number. Return true if successful.
9579 double DpValue = ArgAPInt.bitsToDouble();
9580 APFloat APFloatDp(DpValue);
9581 bool Success = convertToNonDenormSingle(APFloatDp);
9582 if (Success)
9583 ArgAPInt = APFloatDp.bitcastToAPInt();
9584 return Success;
9585}
9586
9587// Nondestructive check for convertTonNonDenormSingle.
9589 // Only convert if it loses info, since XXSPLTIDP should
9590 // handle the other case.
9591 APFloat APFloatToConvert = ArgAPFloat;
9592 bool LosesInfo = true;
9594 &LosesInfo);
9595
9596 return (!LosesInfo && !APFloatToConvert.isDenormal());
9597}
9598
9599static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9600 unsigned &Opcode) {
9601 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9602 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9603 return false;
9604
9605 EVT Ty = Op->getValueType(0);
9606 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9607 // as we cannot handle extending loads for these types.
9608 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9609 ISD::isNON_EXTLoad(InputNode))
9610 return true;
9611
9612 EVT MemVT = InputNode->getMemoryVT();
9613 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9614 // memory VT is the same vector element VT type.
9615 // The loads feeding into the v8i16 and v16i8 types will be extending because
9616 // scalar i8/i16 are not legal types.
9617 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9618 (MemVT == Ty.getVectorElementType()))
9619 return true;
9620
9621 if (Ty == MVT::v2i64) {
9622 // Check the extend type, when the input type is i32, and the output vector
9623 // type is v2i64.
9624 if (MemVT == MVT::i32) {
9625 if (ISD::isZEXTLoad(InputNode))
9626 Opcode = PPCISD::ZEXT_LD_SPLAT;
9627 if (ISD::isSEXTLoad(InputNode))
9628 Opcode = PPCISD::SEXT_LD_SPLAT;
9629 }
9630 return true;
9631 }
9632 return false;
9633}
9634
9636 bool IsLittleEndian) {
9637 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9638
9639 BitMask.clearAllBits();
9640 EVT VT = BVN.getValueType(0);
9641 unsigned VTSize = VT.getSizeInBits();
9642 APInt ConstValue(VTSize, 0);
9643
9644 unsigned EltWidth = VT.getScalarSizeInBits();
9645
9646 unsigned BitPos = 0;
9647 for (auto OpVal : BVN.op_values()) {
9648 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9649
9650 if (!CN)
9651 return false;
9652 // The elements in a vector register are ordered in reverse byte order
9653 // between little-endian and big-endian modes.
9654 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9655 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9656 BitPos += EltWidth;
9657 }
9658
9659 for (unsigned J = 0; J < 16; ++J) {
9660 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9661 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9662 return false;
9663 if (ExtractValue == 0xFF)
9664 BitMask.setBit(J);
9665 }
9666 return true;
9667}
9668
9669// If this is a case we can't handle, return null and let the default
9670// expansion code take care of it. If we CAN select this case, and if it
9671// selects to a single instruction, return Op. Otherwise, if we can codegen
9672// this case more efficiently than a constant pool load, lower it to the
9673// sequence of ops that should be used.
9674SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9675 SelectionDAG &DAG) const {
9676 SDLoc dl(Op);
9677 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9678 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9679
9680 if (Subtarget.hasP10Vector()) {
9681 APInt BitMask(32, 0);
9682 // If the value of the vector is all zeros or all ones,
9683 // we do not convert it to MTVSRBMI.
9684 // The xxleqv instruction sets a vector with all ones.
9685 // The xxlxor instruction sets a vector with all zeros.
9686 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9687 BitMask != 0 && BitMask != 0xffff) {
9688 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9689 MachineSDNode *MSDNode =
9690 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9691 SDValue SDV = SDValue(MSDNode, 0);
9692 EVT DVT = BVN->getValueType(0);
9693 EVT SVT = SDV.getValueType();
9694 if (SVT != DVT) {
9695 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9696 }
9697 return SDV;
9698 }
9699 }
9700 // Check if this is a splat of a constant value.
9701 APInt APSplatBits, APSplatUndef;
9702 unsigned SplatBitSize;
9703 bool HasAnyUndefs;
9704 bool BVNIsConstantSplat =
9705 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9706 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9707
9708 // If it is a splat of a double, check if we can shrink it to a 32 bit
9709 // non-denormal float which when converted back to double gives us the same
9710 // double. This is to exploit the XXSPLTIDP instruction.
9711 // If we lose precision, we use XXSPLTI32DX.
9712 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9713 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9714 // Check the type first to short-circuit so we don't modify APSplatBits if
9715 // this block isn't executed.
9716 if ((Op->getValueType(0) == MVT::v2f64) &&
9717 convertToNonDenormSingle(APSplatBits)) {
9718 SDValue SplatNode = DAG.getNode(
9719 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9720 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9721 return DAG.getBitcast(Op.getValueType(), SplatNode);
9722 } else {
9723 // We may lose precision, so we have to use XXSPLTI32DX.
9724
9725 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9726 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9727 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9728
9729 if (!Hi || !Lo)
9730 // If either load is 0, then we should generate XXLXOR to set to 0.
9731 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9732
9733 if (Hi)
9734 SplatNode = DAG.getNode(
9735 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9736 DAG.getTargetConstant(0, dl, MVT::i32),
9737 DAG.getTargetConstant(Hi, dl, MVT::i32));
9738
9739 if (Lo)
9740 SplatNode =
9741 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9742 DAG.getTargetConstant(1, dl, MVT::i32),
9743 DAG.getTargetConstant(Lo, dl, MVT::i32));
9744
9745 return DAG.getBitcast(Op.getValueType(), SplatNode);
9746 }
9747 }
9748
9749 bool IsSplat64 = false;
9750 uint64_t SplatBits = 0;
9751 int32_t SextVal = 0;
9752 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9753 SplatBits = APSplatBits.getZExtValue();
9754 if (SplatBitSize <= 32) {
9755 SextVal = SignExtend32(SplatBits, SplatBitSize);
9756 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9757 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9758 bool P9Vector = Subtarget.hasP9Vector();
9759 int32_t Hi = P9Vector ? 127 : 15;
9760 int32_t Lo = P9Vector ? -128 : -16;
9761 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9762 SextVal = static_cast<int32_t>(SplatBits);
9763 }
9764 }
9765
9766 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9767 unsigned NewOpcode = PPCISD::LD_SPLAT;
9768
9769 // Handle load-and-splat patterns as we have instructions that will do this
9770 // in one go.
9771 if (DAG.isSplatValue(Op, true) &&
9772 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9773 const SDValue *InputLoad = &Op.getOperand(0);
9774 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9775
9776 // If the input load is an extending load, it will be an i32 -> i64
9777 // extending load and isValidSplatLoad() will update NewOpcode.
9778 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9779 unsigned ElementSize =
9780 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9781
9782 assert(((ElementSize == 2 * MemorySize)
9783 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9784 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9785 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9786 "Unmatched element size and opcode!\n");
9787
9788 // Checking for a single use of this load, we have to check for vector
9789 // width (128 bits) / ElementSize uses (since each operand of the
9790 // BUILD_VECTOR is a separate use of the value.
9791 unsigned NumUsesOfInputLD = 128 / ElementSize;
9792 for (SDValue BVInOp : Op->ops())
9793 if (BVInOp.isUndef())
9794 NumUsesOfInputLD--;
9795
9796 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9797 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9798 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9799 // 15", but function IsValidSplatLoad() now will only return true when
9800 // the data at index 0 is not nullptr. So we will not get into trouble for
9801 // these cases.
9802 //
9803 // case 1 - lfiwzx/lfiwax
9804 // 1.1: load result is i32 and is sign/zero extend to i64;
9805 // 1.2: build a v2i64 vector type with above loaded value;
9806 // 1.3: the vector has only one value at index 0, others are all undef;
9807 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9808 if (NumUsesOfInputLD == 1 &&
9809 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9810 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9811 Subtarget.hasLFIWAX()))
9812 return SDValue();
9813
9814 // case 2 - lxvr[hb]x
9815 // 2.1: load result is at most i16;
9816 // 2.2: build a vector with above loaded value;
9817 // 2.3: the vector has only one value at index 0, others are all undef;
9818 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9819 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9820 Subtarget.isISA3_1() && ElementSize <= 16)
9821 return SDValue();
9822
9823 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9824 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9825 Subtarget.hasVSX()) {
9826 SDValue Ops[] = {
9827 LD->getChain(), // Chain
9828 LD->getBasePtr(), // Ptr
9829 DAG.getValueType(Op.getValueType()) // VT
9830 };
9831 SDValue LdSplt = DAG.getMemIntrinsicNode(
9832 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9833 LD->getMemoryVT(), LD->getMemOperand());
9834 // Replace all uses of the output chain of the original load with the
9835 // output chain of the new load.
9836 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9837 LdSplt.getValue(1));
9838 return LdSplt;
9839 }
9840 }
9841
9842 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9843 // 32-bits can be lowered to VSX instructions under certain conditions.
9844 // Without VSX, there is no pattern more efficient than expanding the node.
9845 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9846 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9847 Subtarget.hasP8Vector()))
9848 return Op;
9849 return SDValue();
9850 }
9851
9852 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9853 unsigned SplatSize = SplatBitSize / 8;
9854
9855 // First, handle single instruction cases.
9856
9857 // All zeros?
9858 if (SplatBits == 0) {
9859 // Canonicalize all zero vectors to be v4i32.
9860 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9861 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9862 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9863 }
9864 return Op;
9865 }
9866
9867 // We have XXSPLTIW for constant splats four bytes wide.
9868 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9869 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9870 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9871 // turned into a 4-byte splat of 0xABABABAB.
9872 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9873 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9874 Op.getValueType(), DAG, dl);
9875
9876 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9877 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9878 dl);
9879
9880 // We have XXSPLTIB for constant splats one byte wide.
9881 if (Subtarget.hasP9Vector() && SplatSize == 1)
9882 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9883 dl);
9884
9885 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9886 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9887 if (SextVal >= -16 && SextVal <= 15) {
9888 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9889 // generate a splat word with extend for size 8.
9890 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9891 SDValue Res =
9892 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9893 if (SplatSize != 8)
9894 return Res;
9895 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9896 }
9897
9898 // Two instruction sequences.
9899
9900 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9901 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9902 SmallVector<SDValue, 16> Ops(16, C);
9903 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9904 unsigned IID;
9905 switch (SplatSize) {
9906 default:
9907 llvm_unreachable("Unexpected type for vector constant.");
9908 case 2:
9909 IID = Intrinsic::ppc_altivec_vupklsb;
9910 break;
9911 case 4:
9912 IID = Intrinsic::ppc_altivec_vextsb2w;
9913 break;
9914 case 8:
9915 IID = Intrinsic::ppc_altivec_vextsb2d;
9916 break;
9917 }
9918 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl);
9919 return DAG.getBitcast(Op->getValueType(0), Extend);
9920 }
9921 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9922
9923 // If this value is in the range [-32,30] and is even, use:
9924 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9925 // If this value is in the range [17,31] and is odd, use:
9926 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9927 // If this value is in the range [-31,-17] and is odd, use:
9928 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9929 // Note the last two are three-instruction sequences.
9930 if (SextVal >= -32 && SextVal <= 31) {
9931 // To avoid having these optimizations undone by constant folding,
9932 // we convert to a pseudo that will be expanded later into one of
9933 // the above forms.
9934 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9935 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9936 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9937 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9938 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9939 if (VT == Op.getValueType())
9940 return RetVal;
9941 else
9942 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9943 }
9944
9945 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9946 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9947 // for fneg/fabs.
9948 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9949 // Make -1 and vspltisw -1:
9950 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9951
9952 // Make the VSLW intrinsic, computing 0x8000_0000.
9953 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9954 OnesV, DAG, dl);
9955
9956 // xor by OnesV to invert it.
9957 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9958 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9959 }
9960
9961 // Check to see if this is a wide variety of vsplti*, binop self cases.
9962 static const signed char SplatCsts[] = {
9963 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9964 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9965 };
9966
9967 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9968 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9969 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9970 int i = SplatCsts[idx];
9971
9972 // Figure out what shift amount will be used by altivec if shifted by i in
9973 // this splat size.
9974 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9975
9976 // vsplti + shl self.
9977 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9978 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9979 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9980 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9981 Intrinsic::ppc_altivec_vslw
9982 };
9983 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9984 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9985 }
9986
9987 // vsplti + srl self.
9988 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9989 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9990 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9991 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9992 Intrinsic::ppc_altivec_vsrw
9993 };
9994 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9995 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9996 }
9997
9998 // vsplti + rol self.
9999 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
10000 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
10001 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10002 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10003 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
10004 Intrinsic::ppc_altivec_vrlw
10005 };
10006 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10007 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10008 }
10009
10010 // t = vsplti c, result = vsldoi t, t, 1
10011 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
10012 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10013 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
10014 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10015 }
10016 // t = vsplti c, result = vsldoi t, t, 2
10017 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
10018 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10019 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
10020 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10021 }
10022 // t = vsplti c, result = vsldoi t, t, 3
10023 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
10024 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10025 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
10026 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10027 }
10028 }
10029
10030 return SDValue();
10031}
10032
10033/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
10034/// the specified operations to build the shuffle.
10035static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
10036 SDValue RHS, SelectionDAG &DAG,
10037 const SDLoc &dl) {
10038 unsigned OpNum = (PFEntry >> 26) & 0x0F;
10039 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
10040 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
10041
10042 enum {
10043 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
10044 OP_VMRGHW,
10045 OP_VMRGLW,
10046 OP_VSPLTISW0,
10047 OP_VSPLTISW1,
10048 OP_VSPLTISW2,
10049 OP_VSPLTISW3,
10050 OP_VSLDOI4,
10051 OP_VSLDOI8,
10052 OP_VSLDOI12
10053 };
10054
10055 if (OpNum == OP_COPY) {
10056 if (LHSID == (1*9+2)*9+3) return LHS;
10057 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10058 return RHS;
10059 }
10060
10061 SDValue OpLHS, OpRHS;
10062 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10063 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10064
10065 int ShufIdxs[16];
10066 switch (OpNum) {
10067 default: llvm_unreachable("Unknown i32 permute!");
10068 case OP_VMRGHW:
10069 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10070 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10071 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10072 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10073 break;
10074 case OP_VMRGLW:
10075 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10076 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10077 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10078 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10079 break;
10080 case OP_VSPLTISW0:
10081 for (unsigned i = 0; i != 16; ++i)
10082 ShufIdxs[i] = (i&3)+0;
10083 break;
10084 case OP_VSPLTISW1:
10085 for (unsigned i = 0; i != 16; ++i)
10086 ShufIdxs[i] = (i&3)+4;
10087 break;
10088 case OP_VSPLTISW2:
10089 for (unsigned i = 0; i != 16; ++i)
10090 ShufIdxs[i] = (i&3)+8;
10091 break;
10092 case OP_VSPLTISW3:
10093 for (unsigned i = 0; i != 16; ++i)
10094 ShufIdxs[i] = (i&3)+12;
10095 break;
10096 case OP_VSLDOI4:
10097 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10098 case OP_VSLDOI8:
10099 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10100 case OP_VSLDOI12:
10101 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10102 }
10103 EVT VT = OpLHS.getValueType();
10104 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10105 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10106 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10107 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10108}
10109
10110/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10111/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10112/// SDValue.
10113SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10114 SelectionDAG &DAG) const {
10115 const unsigned BytesInVector = 16;
10116 bool IsLE = Subtarget.isLittleEndian();
10117 SDLoc dl(N);
10118 SDValue V1 = N->getOperand(0);
10119 SDValue V2 = N->getOperand(1);
10120 unsigned ShiftElts = 0, InsertAtByte = 0;
10121 bool Swap = false;
10122
10123 // Shifts required to get the byte we want at element 7.
10124 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10125 0, 15, 14, 13, 12, 11, 10, 9};
10126 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10127 1, 2, 3, 4, 5, 6, 7, 8};
10128
10129 ArrayRef<int> Mask = N->getMask();
10130 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10131
10132 // For each mask element, find out if we're just inserting something
10133 // from V2 into V1 or vice versa.
10134 // Possible permutations inserting an element from V2 into V1:
10135 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10136 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10137 // ...
10138 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10139 // Inserting from V1 into V2 will be similar, except mask range will be
10140 // [16,31].
10141
10142 bool FoundCandidate = false;
10143 // If both vector operands for the shuffle are the same vector, the mask
10144 // will contain only elements from the first one and the second one will be
10145 // undef.
10146 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10147 // Go through the mask of half-words to find an element that's being moved
10148 // from one vector to the other.
10149 for (unsigned i = 0; i < BytesInVector; ++i) {
10150 unsigned CurrentElement = Mask[i];
10151 // If 2nd operand is undefined, we should only look for element 7 in the
10152 // Mask.
10153 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10154 continue;
10155
10156 bool OtherElementsInOrder = true;
10157 // Examine the other elements in the Mask to see if they're in original
10158 // order.
10159 for (unsigned j = 0; j < BytesInVector; ++j) {
10160 if (j == i)
10161 continue;
10162 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10163 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10164 // in which we always assume we're always picking from the 1st operand.
10165 int MaskOffset =
10166 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10167 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10168 OtherElementsInOrder = false;
10169 break;
10170 }
10171 }
10172 // If other elements are in original order, we record the number of shifts
10173 // we need to get the element we want into element 7. Also record which byte
10174 // in the vector we should insert into.
10175 if (OtherElementsInOrder) {
10176 // If 2nd operand is undefined, we assume no shifts and no swapping.
10177 if (V2.isUndef()) {
10178 ShiftElts = 0;
10179 Swap = false;
10180 } else {
10181 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10182 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10183 : BigEndianShifts[CurrentElement & 0xF];
10184 Swap = CurrentElement < BytesInVector;
10185 }
10186 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10187 FoundCandidate = true;
10188 break;
10189 }
10190 }
10191
10192 if (!FoundCandidate)
10193 return SDValue();
10194
10195 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10196 // optionally with VECSHL if shift is required.
10197 if (Swap)
10198 std::swap(V1, V2);
10199 if (V2.isUndef())
10200 V2 = V1;
10201 if (ShiftElts) {
10202 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10203 DAG.getConstant(ShiftElts, dl, MVT::i32));
10204 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10205 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10206 }
10207 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10208 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10209}
10210
10211/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10212/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10213/// SDValue.
10214SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10215 SelectionDAG &DAG) const {
10216 const unsigned NumHalfWords = 8;
10217 const unsigned BytesInVector = NumHalfWords * 2;
10218 // Check that the shuffle is on half-words.
10219 if (!isNByteElemShuffleMask(N, 2, 1))
10220 return SDValue();
10221
10222 bool IsLE = Subtarget.isLittleEndian();
10223 SDLoc dl(N);
10224 SDValue V1 = N->getOperand(0);
10225 SDValue V2 = N->getOperand(1);
10226 unsigned ShiftElts = 0, InsertAtByte = 0;
10227 bool Swap = false;
10228
10229 // Shifts required to get the half-word we want at element 3.
10230 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10231 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10232
10233 uint32_t Mask = 0;
10234 uint32_t OriginalOrderLow = 0x1234567;
10235 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10236 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10237 // 32-bit space, only need 4-bit nibbles per element.
10238 for (unsigned i = 0; i < NumHalfWords; ++i) {
10239 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10240 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10241 }
10242
10243 // For each mask element, find out if we're just inserting something
10244 // from V2 into V1 or vice versa. Possible permutations inserting an element
10245 // from V2 into V1:
10246 // X, 1, 2, 3, 4, 5, 6, 7
10247 // 0, X, 2, 3, 4, 5, 6, 7
10248 // 0, 1, X, 3, 4, 5, 6, 7
10249 // 0, 1, 2, X, 4, 5, 6, 7
10250 // 0, 1, 2, 3, X, 5, 6, 7
10251 // 0, 1, 2, 3, 4, X, 6, 7
10252 // 0, 1, 2, 3, 4, 5, X, 7
10253 // 0, 1, 2, 3, 4, 5, 6, X
10254 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10255
10256 bool FoundCandidate = false;
10257 // Go through the mask of half-words to find an element that's being moved
10258 // from one vector to the other.
10259 for (unsigned i = 0; i < NumHalfWords; ++i) {
10260 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10261 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10262 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10263 uint32_t TargetOrder = 0x0;
10264
10265 // If both vector operands for the shuffle are the same vector, the mask
10266 // will contain only elements from the first one and the second one will be
10267 // undef.
10268 if (V2.isUndef()) {
10269 ShiftElts = 0;
10270 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10271 TargetOrder = OriginalOrderLow;
10272 Swap = false;
10273 // Skip if not the correct element or mask of other elements don't equal
10274 // to our expected order.
10275 if (MaskOneElt == VINSERTHSrcElem &&
10276 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10277 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10278 FoundCandidate = true;
10279 break;
10280 }
10281 } else { // If both operands are defined.
10282 // Target order is [8,15] if the current mask is between [0,7].
10283 TargetOrder =
10284 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10285 // Skip if mask of other elements don't equal our expected order.
10286 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10287 // We only need the last 3 bits for the number of shifts.
10288 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10289 : BigEndianShifts[MaskOneElt & 0x7];
10290 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10291 Swap = MaskOneElt < NumHalfWords;
10292 FoundCandidate = true;
10293 break;
10294 }
10295 }
10296 }
10297
10298 if (!FoundCandidate)
10299 return SDValue();
10300
10301 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10302 // optionally with VECSHL if shift is required.
10303 if (Swap)
10304 std::swap(V1, V2);
10305 if (V2.isUndef())
10306 V2 = V1;
10307 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10308 if (ShiftElts) {
10309 // Double ShiftElts because we're left shifting on v16i8 type.
10310 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10311 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10312 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10313 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10314 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10315 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10316 }
10317 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10318 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10319 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10320 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10321}
10322
10323/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10324/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10325/// return the default SDValue.
10326SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10327 SelectionDAG &DAG) const {
10328 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10329 // to v16i8. Peek through the bitcasts to get the actual operands.
10332
10333 auto ShuffleMask = SVN->getMask();
10334 SDValue VecShuffle(SVN, 0);
10335 SDLoc DL(SVN);
10336
10337 // Check that we have a four byte shuffle.
10338 if (!isNByteElemShuffleMask(SVN, 4, 1))
10339 return SDValue();
10340
10341 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10342 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10343 std::swap(LHS, RHS);
10345 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10346 if (!CommutedSV)
10347 return SDValue();
10348 ShuffleMask = CommutedSV->getMask();
10349 }
10350
10351 // Ensure that the RHS is a vector of constants.
10352 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10353 if (!BVN)
10354 return SDValue();
10355
10356 // Check if RHS is a splat of 4-bytes (or smaller).
10357 APInt APSplatValue, APSplatUndef;
10358 unsigned SplatBitSize;
10359 bool HasAnyUndefs;
10360 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10361 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10362 SplatBitSize > 32)
10363 return SDValue();
10364
10365 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10366 // The instruction splats a constant C into two words of the source vector
10367 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10368 // Thus we check that the shuffle mask is the equivalent of
10369 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10370 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10371 // within each word are consecutive, so we only need to check the first byte.
10372 SDValue Index;
10373 bool IsLE = Subtarget.isLittleEndian();
10374 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10375 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10376 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10377 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10378 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10379 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10380 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10381 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10382 else
10383 return SDValue();
10384
10385 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10386 // for XXSPLTI32DX.
10387 unsigned SplatVal = APSplatValue.getZExtValue();
10388 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10389 SplatVal |= (SplatVal << SplatBitSize);
10390
10391 SDValue SplatNode = DAG.getNode(
10392 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10393 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10394 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10395}
10396
10397/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10398/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10399/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10400/// i.e (or (shl x, C1), (srl x, 128-C1)).
10401SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10402 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10403 assert(Op.getValueType() == MVT::v1i128 &&
10404 "Only set v1i128 as custom, other type shouldn't reach here!");
10405 SDLoc dl(Op);
10406 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10407 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10408 unsigned SHLAmt = N1.getConstantOperandVal(0);
10409 if (SHLAmt % 8 == 0) {
10410 std::array<int, 16> Mask;
10411 std::iota(Mask.begin(), Mask.end(), 0);
10412 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10413 if (SDValue Shuffle =
10414 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10415 DAG.getUNDEF(MVT::v16i8), Mask))
10416 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10417 }
10418 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10419 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10420 DAG.getConstant(SHLAmt, dl, MVT::i32));
10421 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10422 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10423 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10424 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10425}
10426
10427/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10428/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10429/// return the code it can be lowered into. Worst case, it can always be
10430/// lowered into a vperm.
10431SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10432 SelectionDAG &DAG) const {
10433 SDLoc dl(Op);
10434 SDValue V1 = Op.getOperand(0);
10435 SDValue V2 = Op.getOperand(1);
10436 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10437
10438 // Any nodes that were combined in the target-independent combiner prior
10439 // to vector legalization will not be sent to the target combine. Try to
10440 // combine it here.
10441 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10442 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10443 return NewShuffle;
10444 Op = NewShuffle;
10445 SVOp = cast<ShuffleVectorSDNode>(Op);
10446 V1 = Op.getOperand(0);
10447 V2 = Op.getOperand(1);
10448 }
10449 EVT VT = Op.getValueType();
10450 bool isLittleEndian = Subtarget.isLittleEndian();
10451
10452 unsigned ShiftElts, InsertAtByte;
10453 bool Swap = false;
10454
10455 // If this is a load-and-splat, we can do that with a single instruction
10456 // in some cases. However if the load has multiple uses, we don't want to
10457 // combine it because that will just produce multiple loads.
10458 bool IsPermutedLoad = false;
10459 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10460 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10461 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10462 InputLoad->hasOneUse()) {
10463 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10464 int SplatIdx =
10465 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10466
10467 // The splat index for permuted loads will be in the left half of the vector
10468 // which is strictly wider than the loaded value by 8 bytes. So we need to
10469 // adjust the splat index to point to the correct address in memory.
10470 if (IsPermutedLoad) {
10471 assert((isLittleEndian || IsFourByte) &&
10472 "Unexpected size for permuted load on big endian target");
10473 SplatIdx += IsFourByte ? 2 : 1;
10474 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10475 "Splat of a value outside of the loaded memory");
10476 }
10477
10478 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10479 // For 4-byte load-and-splat, we need Power9.
10480 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10481 uint64_t Offset = 0;
10482 if (IsFourByte)
10483 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10484 else
10485 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10486
10487 // If the width of the load is the same as the width of the splat,
10488 // loading with an offset would load the wrong memory.
10489 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10490 Offset = 0;
10491
10492 SDValue BasePtr = LD->getBasePtr();
10493 if (Offset != 0)
10495 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10496 SDValue Ops[] = {
10497 LD->getChain(), // Chain
10498 BasePtr, // BasePtr
10499 DAG.getValueType(Op.getValueType()) // VT
10500 };
10501 SDVTList VTL =
10502 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10503 SDValue LdSplt =
10505 Ops, LD->getMemoryVT(), LD->getMemOperand());
10506 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10507 if (LdSplt.getValueType() != SVOp->getValueType(0))
10508 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10509 return LdSplt;
10510 }
10511 }
10512
10513 // All v2i64 and v2f64 shuffles are legal
10514 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10515 return Op;
10516
10517 if (Subtarget.hasP9Vector() &&
10518 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10519 isLittleEndian)) {
10520 if (V2.isUndef())
10521 V2 = V1;
10522 else if (Swap)
10523 std::swap(V1, V2);
10524 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10525 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10526 if (ShiftElts) {
10527 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10528 DAG.getConstant(ShiftElts, dl, MVT::i32));
10529 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10530 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10531 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10532 }
10533 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10534 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10535 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10536 }
10537
10538 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10539 SDValue SplatInsertNode;
10540 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10541 return SplatInsertNode;
10542 }
10543
10544 if (Subtarget.hasP9Altivec()) {
10545 SDValue NewISDNode;
10546 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10547 return NewISDNode;
10548
10549 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10550 return NewISDNode;
10551 }
10552
10553 if (Subtarget.hasVSX() &&
10554 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10555 if (Swap)
10556 std::swap(V1, V2);
10557 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10558 SDValue Conv2 =
10559 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10560
10561 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10562 DAG.getConstant(ShiftElts, dl, MVT::i32));
10563 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10564 }
10565
10566 if (Subtarget.hasVSX() &&
10567 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10568 if (Swap)
10569 std::swap(V1, V2);
10570 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10571 SDValue Conv2 =
10572 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10573
10574 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10575 DAG.getConstant(ShiftElts, dl, MVT::i32));
10576 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10577 }
10578
10579 if (Subtarget.hasP9Vector()) {
10580 if (PPC::isXXBRHShuffleMask(SVOp)) {
10581 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10582 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10583 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10584 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10585 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10586 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10587 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10588 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10589 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10590 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10591 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10592 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10593 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10594 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10595 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10596 }
10597 }
10598
10599 if (Subtarget.hasVSX()) {
10600 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10601 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10602
10603 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10604 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10605 DAG.getConstant(SplatIdx, dl, MVT::i32));
10606 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10607 }
10608
10609 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10610 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10611 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10612 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10613 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10614 }
10615 }
10616
10617 // Cases that are handled by instructions that take permute immediates
10618 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10619 // selected by the instruction selector.
10620 if (V2.isUndef()) {
10621 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10622 PPC::isSplatShuffleMask(SVOp, 2) ||
10623 PPC::isSplatShuffleMask(SVOp, 4) ||
10624 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10625 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10626 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10627 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10628 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10629 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10630 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10631 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10632 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10633 (Subtarget.hasP8Altivec() && (
10634 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10635 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10636 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10637 return Op;
10638 }
10639 }
10640
10641 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10642 // and produce a fixed permutation. If any of these match, do not lower to
10643 // VPERM.
10644 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10645 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10646 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10647 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10648 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10649 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10650 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10651 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10652 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10653 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10654 (Subtarget.hasP8Altivec() && (
10655 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10656 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10657 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10658 return Op;
10659
10660 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10661 // perfect shuffle table to emit an optimal matching sequence.
10662 ArrayRef<int> PermMask = SVOp->getMask();
10663
10664 if (!DisablePerfectShuffle && !isLittleEndian) {
10665 unsigned PFIndexes[4];
10666 bool isFourElementShuffle = true;
10667 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10668 ++i) { // Element number
10669 unsigned EltNo = 8; // Start out undef.
10670 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10671 if (PermMask[i * 4 + j] < 0)
10672 continue; // Undef, ignore it.
10673
10674 unsigned ByteSource = PermMask[i * 4 + j];
10675 if ((ByteSource & 3) != j) {
10676 isFourElementShuffle = false;
10677 break;
10678 }
10679
10680 if (EltNo == 8) {
10681 EltNo = ByteSource / 4;
10682 } else if (EltNo != ByteSource / 4) {
10683 isFourElementShuffle = false;
10684 break;
10685 }
10686 }
10687 PFIndexes[i] = EltNo;
10688 }
10689
10690 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10691 // perfect shuffle vector to determine if it is cost effective to do this as
10692 // discrete instructions, or whether we should use a vperm.
10693 // For now, we skip this for little endian until such time as we have a
10694 // little-endian perfect shuffle table.
10695 if (isFourElementShuffle) {
10696 // Compute the index in the perfect shuffle table.
10697 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10698 PFIndexes[2] * 9 + PFIndexes[3];
10699
10700 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10701 unsigned Cost = (PFEntry >> 30);
10702
10703 // Determining when to avoid vperm is tricky. Many things affect the cost
10704 // of vperm, particularly how many times the perm mask needs to be
10705 // computed. For example, if the perm mask can be hoisted out of a loop or
10706 // is already used (perhaps because there are multiple permutes with the
10707 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10708 // permute mask out of the loop requires an extra register.
10709 //
10710 // As a compromise, we only emit discrete instructions if the shuffle can
10711 // be generated in 3 or fewer operations. When we have loop information
10712 // available, if this block is within a loop, we should avoid using vperm
10713 // for 3-operation perms and use a constant pool load instead.
10714 if (Cost < 3)
10715 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10716 }
10717 }
10718
10719 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10720 // vector that will get spilled to the constant pool.
10721 if (V2.isUndef()) V2 = V1;
10722
10723 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10724}
10725
10726SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10727 ArrayRef<int> PermMask, EVT VT,
10728 SDValue V1, SDValue V2) const {
10729 unsigned Opcode = PPCISD::VPERM;
10730 EVT ValType = V1.getValueType();
10731 SDLoc dl(Op);
10732 bool NeedSwap = false;
10733 bool isLittleEndian = Subtarget.isLittleEndian();
10734 bool isPPC64 = Subtarget.isPPC64();
10735
10736 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10737 (V1->hasOneUse() || V2->hasOneUse())) {
10738 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10739 "XXPERM instead\n");
10740 Opcode = PPCISD::XXPERM;
10741
10742 // The second input to XXPERM is also an output so if the second input has
10743 // multiple uses then copying is necessary, as a result we want the
10744 // single-use operand to be used as the second input to prevent copying.
10745 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10746 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10747 std::swap(V1, V2);
10748 NeedSwap = !NeedSwap;
10749 }
10750 }
10751
10752 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10753 // that it is in input element units, not in bytes. Convert now.
10754
10755 // For little endian, the order of the input vectors is reversed, and
10756 // the permutation mask is complemented with respect to 31. This is
10757 // necessary to produce proper semantics with the big-endian-based vperm
10758 // instruction.
10759 EVT EltVT = V1.getValueType().getVectorElementType();
10760 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10761
10762 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10763 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10764
10765 /*
10766 Vectors will be appended like so: [ V1 | v2 ]
10767 XXSWAPD on V1:
10768 [ A | B | C | D ] -> [ C | D | A | B ]
10769 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10770 i.e. index of A, B += 8, and index of C, D -= 8.
10771 XXSWAPD on V2:
10772 [ E | F | G | H ] -> [ G | H | E | F ]
10773 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10774 i.e. index of E, F += 8, index of G, H -= 8
10775 Swap V1 and V2:
10776 [ V1 | V2 ] -> [ V2 | V1 ]
10777 0-15 16-31 0-15 16-31
10778 i.e. index of V1 += 16, index of V2 -= 16
10779 */
10780
10781 SmallVector<SDValue, 16> ResultMask;
10782 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10783 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10784
10785 if (V1HasXXSWAPD) {
10786 if (SrcElt < 8)
10787 SrcElt += 8;
10788 else if (SrcElt < 16)
10789 SrcElt -= 8;
10790 }
10791 if (V2HasXXSWAPD) {
10792 if (SrcElt > 23)
10793 SrcElt -= 8;
10794 else if (SrcElt > 15)
10795 SrcElt += 8;
10796 }
10797 if (NeedSwap) {
10798 if (SrcElt < 16)
10799 SrcElt += 16;
10800 else
10801 SrcElt -= 16;
10802 }
10803 for (unsigned j = 0; j != BytesPerElement; ++j)
10804 if (isLittleEndian)
10805 ResultMask.push_back(
10806 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10807 else
10808 ResultMask.push_back(
10809 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10810 }
10811
10812 if (V1HasXXSWAPD) {
10813 dl = SDLoc(V1->getOperand(0));
10814 V1 = V1->getOperand(0)->getOperand(1);
10815 }
10816 if (V2HasXXSWAPD) {
10817 dl = SDLoc(V2->getOperand(0));
10818 V2 = V2->getOperand(0)->getOperand(1);
10819 }
10820
10821 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10822 if (ValType != MVT::v2f64)
10823 V1 = DAG.getBitcast(MVT::v2f64, V1);
10824 if (V2.getValueType() != MVT::v2f64)
10825 V2 = DAG.getBitcast(MVT::v2f64, V2);
10826 }
10827
10828 ShufflesHandledWithVPERM++;
10829 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10830 LLVM_DEBUG({
10831 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10832 if (Opcode == PPCISD::XXPERM) {
10833 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10834 } else {
10835 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10836 }
10837 SVOp->dump();
10838 dbgs() << "With the following permute control vector:\n";
10839 VPermMask.dump();
10840 });
10841
10842 if (Opcode == PPCISD::XXPERM)
10843 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10844
10845 // Only need to place items backwards in LE,
10846 // the mask was properly calculated.
10847 if (isLittleEndian)
10848 std::swap(V1, V2);
10849
10850 SDValue VPERMNode =
10851 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10852
10853 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10854 return VPERMNode;
10855}
10856
10857/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10858/// vector comparison. If it is, return true and fill in Opc/isDot with
10859/// information about the intrinsic.
10860static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10861 bool &isDot, const PPCSubtarget &Subtarget) {
10862 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10863 CompareOpc = -1;
10864 isDot = false;
10865 switch (IntrinsicID) {
10866 default:
10867 return false;
10868 // Comparison predicates.
10869 case Intrinsic::ppc_altivec_vcmpbfp_p:
10870 CompareOpc = 966;
10871 isDot = true;
10872 break;
10873 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10874 CompareOpc = 198;
10875 isDot = true;
10876 break;
10877 case Intrinsic::ppc_altivec_vcmpequb_p:
10878 CompareOpc = 6;
10879 isDot = true;
10880 break;
10881 case Intrinsic::ppc_altivec_vcmpequh_p:
10882 CompareOpc = 70;
10883 isDot = true;
10884 break;
10885 case Intrinsic::ppc_altivec_vcmpequw_p:
10886 CompareOpc = 134;
10887 isDot = true;
10888 break;
10889 case Intrinsic::ppc_altivec_vcmpequd_p:
10890 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10891 CompareOpc = 199;
10892 isDot = true;
10893 } else
10894 return false;
10895 break;
10896 case Intrinsic::ppc_altivec_vcmpneb_p:
10897 case Intrinsic::ppc_altivec_vcmpneh_p:
10898 case Intrinsic::ppc_altivec_vcmpnew_p:
10899 case Intrinsic::ppc_altivec_vcmpnezb_p:
10900 case Intrinsic::ppc_altivec_vcmpnezh_p:
10901 case Intrinsic::ppc_altivec_vcmpnezw_p:
10902 if (Subtarget.hasP9Altivec()) {
10903 switch (IntrinsicID) {
10904 default:
10905 llvm_unreachable("Unknown comparison intrinsic.");
10906 case Intrinsic::ppc_altivec_vcmpneb_p:
10907 CompareOpc = 7;
10908 break;
10909 case Intrinsic::ppc_altivec_vcmpneh_p:
10910 CompareOpc = 71;
10911 break;
10912 case Intrinsic::ppc_altivec_vcmpnew_p:
10913 CompareOpc = 135;
10914 break;
10915 case Intrinsic::ppc_altivec_vcmpnezb_p:
10916 CompareOpc = 263;
10917 break;
10918 case Intrinsic::ppc_altivec_vcmpnezh_p:
10919 CompareOpc = 327;
10920 break;
10921 case Intrinsic::ppc_altivec_vcmpnezw_p:
10922 CompareOpc = 391;
10923 break;
10924 }
10925 isDot = true;
10926 } else
10927 return false;
10928 break;
10929 case Intrinsic::ppc_altivec_vcmpgefp_p:
10930 CompareOpc = 454;
10931 isDot = true;
10932 break;
10933 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10934 CompareOpc = 710;
10935 isDot = true;
10936 break;
10937 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10938 CompareOpc = 774;
10939 isDot = true;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10942 CompareOpc = 838;
10943 isDot = true;
10944 break;
10945 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10946 CompareOpc = 902;
10947 isDot = true;
10948 break;
10949 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10950 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10951 CompareOpc = 967;
10952 isDot = true;
10953 } else
10954 return false;
10955 break;
10956 case Intrinsic::ppc_altivec_vcmpgtub_p:
10957 CompareOpc = 518;
10958 isDot = true;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10961 CompareOpc = 582;
10962 isDot = true;
10963 break;
10964 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10965 CompareOpc = 646;
10966 isDot = true;
10967 break;
10968 case Intrinsic::ppc_altivec_vcmpgtud_p:
10969 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10970 CompareOpc = 711;
10971 isDot = true;
10972 } else
10973 return false;
10974 break;
10975
10976 case Intrinsic::ppc_altivec_vcmpequq:
10977 case Intrinsic::ppc_altivec_vcmpgtsq:
10978 case Intrinsic::ppc_altivec_vcmpgtuq:
10979 if (!Subtarget.isISA3_1())
10980 return false;
10981 switch (IntrinsicID) {
10982 default:
10983 llvm_unreachable("Unknown comparison intrinsic.");
10984 case Intrinsic::ppc_altivec_vcmpequq:
10985 CompareOpc = 455;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtsq:
10988 CompareOpc = 903;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtuq:
10991 CompareOpc = 647;
10992 break;
10993 }
10994 break;
10995
10996 // VSX predicate comparisons use the same infrastructure
10997 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10998 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10999 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11000 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11001 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11002 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11003 if (Subtarget.hasVSX()) {
11004 switch (IntrinsicID) {
11005 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11006 CompareOpc = 99;
11007 break;
11008 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11009 CompareOpc = 115;
11010 break;
11011 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11012 CompareOpc = 107;
11013 break;
11014 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11015 CompareOpc = 67;
11016 break;
11017 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11018 CompareOpc = 83;
11019 break;
11020 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11021 CompareOpc = 75;
11022 break;
11023 }
11024 isDot = true;
11025 } else
11026 return false;
11027 break;
11028
11029 // Normal Comparisons.
11030 case Intrinsic::ppc_altivec_vcmpbfp:
11031 CompareOpc = 966;
11032 break;
11033 case Intrinsic::ppc_altivec_vcmpeqfp:
11034 CompareOpc = 198;
11035 break;
11036 case Intrinsic::ppc_altivec_vcmpequb:
11037 CompareOpc = 6;
11038 break;
11039 case Intrinsic::ppc_altivec_vcmpequh:
11040 CompareOpc = 70;
11041 break;
11042 case Intrinsic::ppc_altivec_vcmpequw:
11043 CompareOpc = 134;
11044 break;
11045 case Intrinsic::ppc_altivec_vcmpequd:
11046 if (Subtarget.hasP8Altivec())
11047 CompareOpc = 199;
11048 else
11049 return false;
11050 break;
11051 case Intrinsic::ppc_altivec_vcmpneb:
11052 case Intrinsic::ppc_altivec_vcmpneh:
11053 case Intrinsic::ppc_altivec_vcmpnew:
11054 case Intrinsic::ppc_altivec_vcmpnezb:
11055 case Intrinsic::ppc_altivec_vcmpnezh:
11056 case Intrinsic::ppc_altivec_vcmpnezw:
11057 if (Subtarget.hasP9Altivec())
11058 switch (IntrinsicID) {
11059 default:
11060 llvm_unreachable("Unknown comparison intrinsic.");
11061 case Intrinsic::ppc_altivec_vcmpneb:
11062 CompareOpc = 7;
11063 break;
11064 case Intrinsic::ppc_altivec_vcmpneh:
11065 CompareOpc = 71;
11066 break;
11067 case Intrinsic::ppc_altivec_vcmpnew:
11068 CompareOpc = 135;
11069 break;
11070 case Intrinsic::ppc_altivec_vcmpnezb:
11071 CompareOpc = 263;
11072 break;
11073 case Intrinsic::ppc_altivec_vcmpnezh:
11074 CompareOpc = 327;
11075 break;
11076 case Intrinsic::ppc_altivec_vcmpnezw:
11077 CompareOpc = 391;
11078 break;
11079 }
11080 else
11081 return false;
11082 break;
11083 case Intrinsic::ppc_altivec_vcmpgefp:
11084 CompareOpc = 454;
11085 break;
11086 case Intrinsic::ppc_altivec_vcmpgtfp:
11087 CompareOpc = 710;
11088 break;
11089 case Intrinsic::ppc_altivec_vcmpgtsb:
11090 CompareOpc = 774;
11091 break;
11092 case Intrinsic::ppc_altivec_vcmpgtsh:
11093 CompareOpc = 838;
11094 break;
11095 case Intrinsic::ppc_altivec_vcmpgtsw:
11096 CompareOpc = 902;
11097 break;
11098 case Intrinsic::ppc_altivec_vcmpgtsd:
11099 if (Subtarget.hasP8Altivec())
11100 CompareOpc = 967;
11101 else
11102 return false;
11103 break;
11104 case Intrinsic::ppc_altivec_vcmpgtub:
11105 CompareOpc = 518;
11106 break;
11107 case Intrinsic::ppc_altivec_vcmpgtuh:
11108 CompareOpc = 582;
11109 break;
11110 case Intrinsic::ppc_altivec_vcmpgtuw:
11111 CompareOpc = 646;
11112 break;
11113 case Intrinsic::ppc_altivec_vcmpgtud:
11114 if (Subtarget.hasP8Altivec())
11115 CompareOpc = 711;
11116 else
11117 return false;
11118 break;
11119 case Intrinsic::ppc_altivec_vcmpequq_p:
11120 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11121 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11122 if (!Subtarget.isISA3_1())
11123 return false;
11124 switch (IntrinsicID) {
11125 default:
11126 llvm_unreachable("Unknown comparison intrinsic.");
11127 case Intrinsic::ppc_altivec_vcmpequq_p:
11128 CompareOpc = 455;
11129 break;
11130 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11131 CompareOpc = 903;
11132 break;
11133 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11134 CompareOpc = 647;
11135 break;
11136 }
11137 isDot = true;
11138 break;
11139 }
11140 return true;
11141}
11142
11143/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11144/// lower, do it, otherwise return null.
11145SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11146 SelectionDAG &DAG) const {
11147 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11148
11149 SDLoc dl(Op);
11150
11151 switch (IntrinsicID) {
11152 case Intrinsic::thread_pointer:
11153 // Reads the thread pointer register, used for __builtin_thread_pointer.
11154 if (Subtarget.isPPC64())
11155 return DAG.getRegister(PPC::X13, MVT::i64);
11156 return DAG.getRegister(PPC::R2, MVT::i32);
11157
11158 case Intrinsic::ppc_rldimi: {
11159 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11160 SDValue Src = Op.getOperand(1);
11161 APInt Mask = Op.getConstantOperandAPInt(4);
11162 if (Mask.isZero())
11163 return Op.getOperand(2);
11164 if (Mask.isAllOnes())
11165 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11166 uint64_t SH = Op.getConstantOperandVal(3);
11167 unsigned MB = 0, ME = 0;
11168 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11169 report_fatal_error("invalid rldimi mask!");
11170 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11171 if (ME < 63 - SH) {
11172 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11173 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11174 } else if (ME > 63 - SH) {
11175 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11176 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11177 }
11178 return SDValue(
11179 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11180 {Op.getOperand(2), Src,
11181 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11182 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11183 0);
11184 }
11185
11186 case Intrinsic::ppc_rlwimi: {
11187 APInt Mask = Op.getConstantOperandAPInt(4);
11188 if (Mask.isZero())
11189 return Op.getOperand(2);
11190 if (Mask.isAllOnes())
11191 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11192 Op.getOperand(3));
11193 unsigned MB = 0, ME = 0;
11194 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11195 report_fatal_error("invalid rlwimi mask!");
11196 return SDValue(DAG.getMachineNode(
11197 PPC::RLWIMI, dl, MVT::i32,
11198 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11199 DAG.getTargetConstant(MB, dl, MVT::i32),
11200 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11201 0);
11202 }
11203
11204 case Intrinsic::ppc_rlwnm: {
11205 if (Op.getConstantOperandVal(3) == 0)
11206 return DAG.getConstant(0, dl, MVT::i32);
11207 unsigned MB = 0, ME = 0;
11208 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11209 report_fatal_error("invalid rlwnm mask!");
11210 return SDValue(
11211 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11212 {Op.getOperand(1), Op.getOperand(2),
11213 DAG.getTargetConstant(MB, dl, MVT::i32),
11214 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11215 0);
11216 }
11217
11218 case Intrinsic::ppc_mma_disassemble_acc: {
11219 if (Subtarget.isISAFuture()) {
11220 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11221 SDValue WideVec =
11222 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11223 Op.getOperand(1)),
11224 0);
11226 SDValue Value = SDValue(WideVec.getNode(), 0);
11227 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11228
11229 SDValue Extract;
11230 Extract = DAG.getNode(
11231 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11232 Subtarget.isLittleEndian() ? Value2 : Value,
11233 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11234 dl, getPointerTy(DAG.getDataLayout())));
11235 RetOps.push_back(Extract);
11236 Extract = DAG.getNode(
11237 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11238 Subtarget.isLittleEndian() ? Value2 : Value,
11239 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11240 dl, getPointerTy(DAG.getDataLayout())));
11241 RetOps.push_back(Extract);
11242 Extract = DAG.getNode(
11243 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11244 Subtarget.isLittleEndian() ? Value : Value2,
11245 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11246 dl, getPointerTy(DAG.getDataLayout())));
11247 RetOps.push_back(Extract);
11248 Extract = DAG.getNode(
11249 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11250 Subtarget.isLittleEndian() ? Value : Value2,
11251 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11252 dl, getPointerTy(DAG.getDataLayout())));
11253 RetOps.push_back(Extract);
11254 return DAG.getMergeValues(RetOps, dl);
11255 }
11256 [[fallthrough]];
11257 }
11258 case Intrinsic::ppc_vsx_disassemble_pair: {
11259 int NumVecs = 2;
11260 SDValue WideVec = Op.getOperand(1);
11261 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11262 NumVecs = 4;
11263 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11264 }
11266 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11267 SDValue Extract = DAG.getNode(
11268 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11269 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11270 : VecNo,
11271 dl, getPointerTy(DAG.getDataLayout())));
11272 RetOps.push_back(Extract);
11273 }
11274 return DAG.getMergeValues(RetOps, dl);
11275 }
11276
11277 case Intrinsic::ppc_mma_build_dmr: {
11280 for (int i = 1; i < 9; i += 2) {
11281 SDValue Hi = Op.getOperand(i);
11282 SDValue Lo = Op.getOperand(i + 1);
11283 if (Hi->getOpcode() == ISD::LOAD)
11284 Chains.push_back(Hi.getValue(1));
11285 if (Lo->getOpcode() == ISD::LOAD)
11286 Chains.push_back(Lo.getValue(1));
11287 Pairs.push_back(
11288 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11289 }
11290 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11291 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11292 return DAG.getMergeValues({Value, TF}, dl);
11293 }
11294
11295 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11296 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11297 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11298 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11299 "Specify P of 0 or 1 for lower or upper 512 bytes");
11300 unsigned HiLo = Idx->getSExtValue();
11301 unsigned Opcode;
11302 unsigned Subx;
11303 if (HiLo == 0) {
11304 Opcode = PPC::DMXXEXTFDMR512;
11305 Subx = PPC::sub_wacc_lo;
11306 } else {
11307 Opcode = PPC::DMXXEXTFDMR512_HI;
11308 Subx = PPC::sub_wacc_hi;
11309 }
11310 SDValue Subreg(
11311 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11312 Op.getOperand(1),
11313 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11314 0);
11315 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11316 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11317 }
11318
11319 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11320 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11321 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11322 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11323 "Specify a dmr row pair 0-3");
11324 unsigned IdxVal = Idx->getSExtValue();
11325 unsigned Subx;
11326 switch (IdxVal) {
11327 case 0:
11328 Subx = PPC::sub_dmrrowp0;
11329 break;
11330 case 1:
11331 Subx = PPC::sub_dmrrowp1;
11332 break;
11333 case 2:
11334 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11335 break;
11336 case 3:
11337 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11338 break;
11339 }
11340 SDValue Subreg(
11341 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11342 Op.getOperand(1),
11343 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11344 0);
11345 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11346 return SDValue(
11347 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11348 0);
11349 }
11350
11351 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11352 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11353 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11354 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11355 "Specify P of 0 or 1 for lower or upper 512 bytes");
11356 unsigned HiLo = Idx->getSExtValue();
11357 unsigned Opcode;
11358 unsigned Subx;
11359 if (HiLo == 0) {
11360 Opcode = PPC::DMXXINSTDMR512;
11361 Subx = PPC::sub_wacc_lo;
11362 } else {
11363 Opcode = PPC::DMXXINSTDMR512_HI;
11364 Subx = PPC::sub_wacc_hi;
11365 }
11366 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11367 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11368 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11369 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11370 Op.getOperand(1), Wacc, SubReg),
11371 0);
11372 }
11373
11374 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11375 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11376 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11377 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11378 "Specify a dmr row pair 0-3");
11379 unsigned IdxVal = Idx->getSExtValue();
11380 unsigned Subx;
11381 switch (IdxVal) {
11382 case 0:
11383 Subx = PPC::sub_dmrrowp0;
11384 break;
11385 case 1:
11386 Subx = PPC::sub_dmrrowp1;
11387 break;
11388 case 2:
11389 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11390 break;
11391 case 3:
11392 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11393 break;
11394 }
11395 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11396 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11397 SDValue Ops[] = {Op.getOperand(2), P};
11398 SDValue DMRRowp = SDValue(
11399 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11400 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11401 Op.getOperand(1), DMRRowp, SubReg),
11402 0);
11403 }
11404
11405 case Intrinsic::ppc_mma_xxmfacc:
11406 case Intrinsic::ppc_mma_xxmtacc: {
11407 // Allow pre-isa-future subtargets to lower as normal.
11408 if (!Subtarget.isISAFuture())
11409 return SDValue();
11410 // The intrinsics for xxmtacc and xxmfacc take one argument of
11411 // type v512i1, for future cpu the corresponding wacc instruction
11412 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11413 // the need to produce the xxm[t|f]acc.
11414 SDValue WideVec = Op.getOperand(1);
11415 DAG.ReplaceAllUsesWith(Op, WideVec);
11416 return SDValue();
11417 }
11418
11419 case Intrinsic::ppc_unpack_longdouble: {
11420 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11421 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11422 "Argument of long double unpack must be 0 or 1!");
11423 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11424 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11425 Idx->getValueType(0)));
11426 }
11427
11428 case Intrinsic::ppc_compare_exp_lt:
11429 case Intrinsic::ppc_compare_exp_gt:
11430 case Intrinsic::ppc_compare_exp_eq:
11431 case Intrinsic::ppc_compare_exp_uo: {
11432 unsigned Pred;
11433 switch (IntrinsicID) {
11434 case Intrinsic::ppc_compare_exp_lt:
11435 Pred = PPC::PRED_LT;
11436 break;
11437 case Intrinsic::ppc_compare_exp_gt:
11438 Pred = PPC::PRED_GT;
11439 break;
11440 case Intrinsic::ppc_compare_exp_eq:
11441 Pred = PPC::PRED_EQ;
11442 break;
11443 case Intrinsic::ppc_compare_exp_uo:
11444 Pred = PPC::PRED_UN;
11445 break;
11446 }
11447 return SDValue(
11448 DAG.getMachineNode(
11449 PPC::SELECT_CC_I4, dl, MVT::i32,
11450 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11451 Op.getOperand(1), Op.getOperand(2)),
11452 0),
11453 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11454 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11455 0);
11456 }
11457 case Intrinsic::ppc_test_data_class: {
11458 EVT OpVT = Op.getOperand(1).getValueType();
11459 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11460 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11461 : PPC::XSTSTDCSP);
11462 return SDValue(
11463 DAG.getMachineNode(
11464 PPC::SELECT_CC_I4, dl, MVT::i32,
11465 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11466 Op.getOperand(1)),
11467 0),
11468 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11469 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11470 0);
11471 }
11472 case Intrinsic::ppc_fnmsub: {
11473 EVT VT = Op.getOperand(1).getValueType();
11474 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11475 return DAG.getNode(
11476 ISD::FNEG, dl, VT,
11477 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11478 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11479 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11480 Op.getOperand(2), Op.getOperand(3));
11481 }
11482 case Intrinsic::ppc_convert_f128_to_ppcf128:
11483 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11484 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11485 ? RTLIB::CONVERT_PPCF128_F128
11486 : RTLIB::CONVERT_F128_PPCF128;
11487 MakeLibCallOptions CallOptions;
11488 std::pair<SDValue, SDValue> Result =
11489 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11490 dl, SDValue());
11491 return Result.first;
11492 }
11493 case Intrinsic::ppc_maxfe:
11494 case Intrinsic::ppc_maxfl:
11495 case Intrinsic::ppc_maxfs:
11496 case Intrinsic::ppc_minfe:
11497 case Intrinsic::ppc_minfl:
11498 case Intrinsic::ppc_minfs: {
11499 EVT VT = Op.getValueType();
11500 assert(
11501 all_of(Op->ops().drop_front(4),
11502 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11503 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11504 (void)VT;
11506 if (IntrinsicID == Intrinsic::ppc_minfe ||
11507 IntrinsicID == Intrinsic::ppc_minfl ||
11508 IntrinsicID == Intrinsic::ppc_minfs)
11509 CC = ISD::SETLT;
11510 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11511 SDValue Res = Op.getOperand(I);
11512 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11513 Res =
11514 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11515 }
11516 return Res;
11517 }
11518 }
11519
11520 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11521 // opcode number of the comparison.
11522 int CompareOpc;
11523 bool isDot;
11524 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11525 return SDValue(); // Don't custom lower most intrinsics.
11526
11527 // If this is a non-dot comparison, make the VCMP node and we are done.
11528 if (!isDot) {
11529 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11530 Op.getOperand(1), Op.getOperand(2),
11531 DAG.getConstant(CompareOpc, dl, MVT::i32));
11532 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11533 }
11534
11535 // Create the PPCISD altivec 'dot' comparison node.
11536 SDValue Ops[] = {
11537 Op.getOperand(2), // LHS
11538 Op.getOperand(3), // RHS
11539 DAG.getConstant(CompareOpc, dl, MVT::i32)
11540 };
11541 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11542 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11543
11544 // Unpack the result based on how the target uses it.
11545 unsigned BitNo; // Bit # of CR6.
11546 bool InvertBit; // Invert result?
11547 unsigned Bitx;
11548 unsigned SetOp;
11549 switch (Op.getConstantOperandVal(1)) {
11550 default: // Can't happen, don't crash on invalid number though.
11551 case 0: // Return the value of the EQ bit of CR6.
11552 BitNo = 0;
11553 InvertBit = false;
11554 Bitx = PPC::sub_eq;
11555 SetOp = PPCISD::SETBC;
11556 break;
11557 case 1: // Return the inverted value of the EQ bit of CR6.
11558 BitNo = 0;
11559 InvertBit = true;
11560 Bitx = PPC::sub_eq;
11561 SetOp = PPCISD::SETBCR;
11562 break;
11563 case 2: // Return the value of the LT bit of CR6.
11564 BitNo = 2;
11565 InvertBit = false;
11566 Bitx = PPC::sub_lt;
11567 SetOp = PPCISD::SETBC;
11568 break;
11569 case 3: // Return the inverted value of the LT bit of CR6.
11570 BitNo = 2;
11571 InvertBit = true;
11572 Bitx = PPC::sub_lt;
11573 SetOp = PPCISD::SETBCR;
11574 break;
11575 }
11576
11577 SDValue GlueOp = CompNode.getValue(1);
11578 if (Subtarget.isISA3_1()) {
11579 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11580 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11581 SDValue CRBit =
11582 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11583 CR6Reg, SubRegIdx, GlueOp),
11584 0);
11585 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11586 }
11587
11588 // Now that we have the comparison, emit a copy from the CR to a GPR.
11589 // This is flagged to the above dot comparison.
11590 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11591 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11592
11593 // Shift the bit into the low position.
11594 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11595 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11596 // Isolate the bit.
11597 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11598 DAG.getConstant(1, dl, MVT::i32));
11599
11600 // If we are supposed to, toggle the bit.
11601 if (InvertBit)
11602 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11603 DAG.getConstant(1, dl, MVT::i32));
11604 return Flags;
11605}
11606
11607SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11608 SelectionDAG &DAG) const {
11609 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11610 // the beginning of the argument list.
11611 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11612 SDLoc DL(Op);
11613 switch (Op.getConstantOperandVal(ArgStart)) {
11614 case Intrinsic::ppc_cfence: {
11615 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11616 SDValue Val = Op.getOperand(ArgStart + 1);
11617 EVT Ty = Val.getValueType();
11618 if (Ty == MVT::i128) {
11619 // FIXME: Testing one of two paired registers is sufficient to guarantee
11620 // ordering?
11621 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11622 }
11623 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11624 return SDValue(
11625 DAG.getMachineNode(
11626 Opcode, DL, MVT::Other,
11627 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11628 Op.getOperand(0)),
11629 0);
11630 }
11631 case Intrinsic::ppc_mma_disassemble_dmr: {
11632 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11633 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11634 }
11635 default:
11636 break;
11637 }
11638 return SDValue();
11639}
11640
11641// Lower scalar BSWAP64 to xxbrd.
11642SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11643 SDLoc dl(Op);
11644 if (!Subtarget.isPPC64())
11645 return Op;
11646 // MTVSRDD
11647 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11648 Op.getOperand(0));
11649 // XXBRD
11650 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11651 // MFVSRD
11652 int VectorIndex = 0;
11653 if (Subtarget.isLittleEndian())
11654 VectorIndex = 1;
11655 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11656 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11657 return Op;
11658}
11659
11660// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11661// compared to a value that is atomically loaded (atomic loads zero-extend).
11662SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11663 SelectionDAG &DAG) const {
11664 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11665 "Expecting an atomic compare-and-swap here.");
11666 SDLoc dl(Op);
11667 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11668 EVT MemVT = AtomicNode->getMemoryVT();
11669 if (MemVT.getSizeInBits() >= 32)
11670 return Op;
11671
11672 SDValue CmpOp = Op.getOperand(2);
11673 // If this is already correctly zero-extended, leave it alone.
11674 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11675 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11676 return Op;
11677
11678 // Clear the high bits of the compare operand.
11679 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11680 SDValue NewCmpOp =
11681 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11682 DAG.getConstant(MaskVal, dl, MVT::i32));
11683
11684 // Replace the existing compare operand with the properly zero-extended one.
11686 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11687 Ops.push_back(AtomicNode->getOperand(i));
11688 Ops[2] = NewCmpOp;
11689 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11690 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11691 auto NodeTy =
11693 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11694}
11695
11696SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11697 SelectionDAG &DAG) const {
11698 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11699 EVT MemVT = N->getMemoryVT();
11700 assert(MemVT.getSimpleVT() == MVT::i128 &&
11701 "Expect quadword atomic operations");
11702 SDLoc dl(N);
11703 unsigned Opc = N->getOpcode();
11704 switch (Opc) {
11705 case ISD::ATOMIC_LOAD: {
11706 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11707 // lowered to ppc instructions by pattern matching instruction selector.
11708 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11710 N->getOperand(0),
11711 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11712 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11713 Ops.push_back(N->getOperand(I));
11714 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11715 Ops, MemVT, N->getMemOperand());
11716 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11717 SDValue ValHi =
11718 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11719 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11720 DAG.getConstant(64, dl, MVT::i32));
11721 SDValue Val =
11722 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11723 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11724 {Val, LoadedVal.getValue(2)});
11725 }
11726 case ISD::ATOMIC_STORE: {
11727 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11728 // lowered to ppc instructions by pattern matching instruction selector.
11729 SDVTList Tys = DAG.getVTList(MVT::Other);
11731 N->getOperand(0),
11732 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11733 SDValue Val = N->getOperand(1);
11734 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11735 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11736 DAG.getConstant(64, dl, MVT::i32));
11737 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11738 Ops.push_back(ValLo);
11739 Ops.push_back(ValHi);
11740 Ops.push_back(N->getOperand(2));
11741 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11742 N->getMemOperand());
11743 }
11744 default:
11745 llvm_unreachable("Unexpected atomic opcode");
11746 }
11747}
11748
11750 SelectionDAG &DAG,
11751 const PPCSubtarget &Subtarget) {
11752 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11753
11754 enum DataClassMask {
11755 DC_NAN = 1 << 6,
11756 DC_NEG_INF = 1 << 4,
11757 DC_POS_INF = 1 << 5,
11758 DC_NEG_ZERO = 1 << 2,
11759 DC_POS_ZERO = 1 << 3,
11760 DC_NEG_SUBNORM = 1,
11761 DC_POS_SUBNORM = 1 << 1,
11762 };
11763
11764 EVT VT = Op.getValueType();
11765
11766 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11767 : VT == MVT::f64 ? PPC::XSTSTDCDP
11768 : PPC::XSTSTDCSP;
11769
11770 if (Mask == fcAllFlags)
11771 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11772 if (Mask == 0)
11773 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11774
11775 // When it's cheaper or necessary to test reverse flags.
11776 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11777 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11778 return DAG.getNOT(Dl, Rev, MVT::i1);
11779 }
11780
11781 // Power doesn't support testing whether a value is 'normal'. Test the rest
11782 // first, and test if it's 'not not-normal' with expected sign.
11783 if (Mask & fcNormal) {
11784 SDValue Rev(DAG.getMachineNode(
11785 TestOp, Dl, MVT::i32,
11786 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11787 DC_NEG_ZERO | DC_POS_ZERO |
11788 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11789 Dl, MVT::i32),
11790 Op),
11791 0);
11792 // Sign are stored in CR bit 0, result are in CR bit 2.
11793 SDValue Sign(
11794 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11795 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11796 0);
11797 SDValue Normal(DAG.getNOT(
11798 Dl,
11800 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11801 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11802 0),
11803 MVT::i1));
11804 if (Mask & fcPosNormal)
11805 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11806 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11807 if (Mask == fcPosNormal || Mask == fcNegNormal)
11808 return Result;
11809
11810 return DAG.getNode(
11811 ISD::OR, Dl, MVT::i1,
11812 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11813 }
11814
11815 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11816 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11817 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11818 bool IsQuiet = Mask & fcQNan;
11819 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11820
11821 // Quietness is determined by the first bit in fraction field.
11822 uint64_t QuietMask = 0;
11823 SDValue HighWord;
11824 if (VT == MVT::f128) {
11825 HighWord = DAG.getNode(
11826 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11827 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11828 QuietMask = 0x8000;
11829 } else if (VT == MVT::f64) {
11830 if (Subtarget.isPPC64()) {
11831 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11832 DAG.getBitcast(MVT::i64, Op),
11833 DAG.getConstant(1, Dl, MVT::i32));
11834 } else {
11835 SDValue Vec = DAG.getBitcast(
11836 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11837 HighWord = DAG.getNode(
11838 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11839 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11840 }
11841 QuietMask = 0x80000;
11842 } else if (VT == MVT::f32) {
11843 HighWord = DAG.getBitcast(MVT::i32, Op);
11844 QuietMask = 0x400000;
11845 }
11846 SDValue NanRes = DAG.getSetCC(
11847 Dl, MVT::i1,
11848 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11849 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11850 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11851 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11852 if (Mask == fcQNan || Mask == fcSNan)
11853 return NanRes;
11854
11855 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11856 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11857 NanRes);
11858 }
11859
11860 unsigned NativeMask = 0;
11861 if ((Mask & fcNan) == fcNan)
11862 NativeMask |= DC_NAN;
11863 if (Mask & fcNegInf)
11864 NativeMask |= DC_NEG_INF;
11865 if (Mask & fcPosInf)
11866 NativeMask |= DC_POS_INF;
11867 if (Mask & fcNegZero)
11868 NativeMask |= DC_NEG_ZERO;
11869 if (Mask & fcPosZero)
11870 NativeMask |= DC_POS_ZERO;
11871 if (Mask & fcNegSubnormal)
11872 NativeMask |= DC_NEG_SUBNORM;
11873 if (Mask & fcPosSubnormal)
11874 NativeMask |= DC_POS_SUBNORM;
11875 return SDValue(
11876 DAG.getMachineNode(
11877 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11879 TestOp, Dl, MVT::i32,
11880 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11881 0),
11882 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11883 0);
11884}
11885
11886SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11887 SelectionDAG &DAG) const {
11888 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11889 SDValue LHS = Op.getOperand(0);
11890 uint64_t RHSC = Op.getConstantOperandVal(1);
11891 SDLoc Dl(Op);
11892 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11893 if (LHS.getValueType() == MVT::ppcf128) {
11894 // The higher part determines the value class.
11895 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11896 DAG.getConstant(1, Dl, MVT::i32));
11897 }
11898
11899 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11900}
11901
11902SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11903 SelectionDAG &DAG) const {
11904 SDLoc dl(Op);
11905
11907 SDValue Op0 = Op.getOperand(0);
11908 EVT ValVT = Op0.getValueType();
11909 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11910 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11911 int64_t IntVal = Op.getConstantOperandVal(0);
11912 if (IntVal >= -16 && IntVal <= 15)
11913 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11914 dl);
11915 }
11916
11917 ReuseLoadInfo RLI;
11918 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11919 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11920 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11921 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11922
11923 MachineMemOperand *MMO =
11925 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11926 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11928 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11929 MVT::i32, MMO);
11930 if (RLI.ResChain)
11931 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11932 return Bits.getValue(0);
11933 }
11934
11935 // Create a stack slot that is 16-byte aligned.
11936 MachineFrameInfo &MFI = MF.getFrameInfo();
11937 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11938 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11939 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11940
11941 SDValue Val = Op0;
11942 // P10 hardware store forwarding requires that a single store contains all
11943 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11944 // to avoid load hit store on P10 when running binaries compiled for older
11945 // processors by generating two mergeable scalar stores to forward with the
11946 // vector load.
11947 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11948 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11949 ValVT.getSizeInBits() <= 64) {
11950 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11951 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11952 SDValue ShiftBy = DAG.getConstant(
11953 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11954 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11955 SDValue Plus8 =
11956 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11957 SDValue Store2 =
11958 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11959 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11960 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11962 }
11963
11964 // Store the input value into Value#0 of the stack slot.
11965 SDValue Store =
11966 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11967 // Load it out.
11968 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11969}
11970
11971SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11972 SelectionDAG &DAG) const {
11973 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11974 "Should only be called for ISD::INSERT_VECTOR_ELT");
11975
11976 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11977
11978 EVT VT = Op.getValueType();
11979 SDLoc dl(Op);
11980 SDValue V1 = Op.getOperand(0);
11981 SDValue V2 = Op.getOperand(1);
11982
11983 if (VT == MVT::v2f64 && C)
11984 return Op;
11985
11986 if (Subtarget.hasP9Vector()) {
11987 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11988 // because on P10, it allows this specific insert_vector_elt load pattern to
11989 // utilize the refactored load and store infrastructure in order to exploit
11990 // prefixed loads.
11991 // On targets with inexpensive direct moves (Power9 and up), a
11992 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11993 // load since a single precision load will involve conversion to double
11994 // precision on the load followed by another conversion to single precision.
11995 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11996 (isa<LoadSDNode>(V2))) {
11997 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11998 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11999 SDValue InsVecElt =
12000 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12001 BitcastLoad, Op.getOperand(2));
12002 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12003 }
12004 }
12005
12006 if (Subtarget.isISA3_1()) {
12007 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12008 return SDValue();
12009 // On P10, we have legal lowering for constant and variable indices for
12010 // all vectors.
12011 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12012 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12013 return Op;
12014 }
12015
12016 // Before P10, we have legal lowering for constant indices but not for
12017 // variable ones.
12018 if (!C)
12019 return SDValue();
12020
12021 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12022 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12023 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12024 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12025 unsigned InsertAtElement = C->getZExtValue();
12026 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12027 if (Subtarget.isLittleEndian()) {
12028 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12029 }
12030 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12031 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12032 }
12033 return Op;
12034}
12035
12036SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12037 SelectionDAG &DAG) const {
12038 SDLoc dl(Op);
12039 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12040 SDValue LoadChain = LN->getChain();
12041 SDValue BasePtr = LN->getBasePtr();
12042 EVT VT = Op.getValueType();
12043 bool IsV1024i1 = VT == MVT::v1024i1;
12044 bool IsV2048i1 = VT == MVT::v2048i1;
12045
12046 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12047 // Dense Math dmr pair registers, respectively.
12048 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12049 (void)IsV2048i1;
12050 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12051 "Dense Math support required.");
12052 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12053
12055 SmallVector<SDValue, 8> LoadChains;
12056
12057 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12058 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12059 MachineMemOperand *MMO = LN->getMemOperand();
12060 unsigned NumVecs = VT.getSizeInBits() / 256;
12061 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12062 MachineMemOperand *NewMMO =
12063 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12064 if (Idx > 0) {
12065 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12066 DAG.getConstant(32, dl, BasePtr.getValueType()));
12067 LoadOps[2] = BasePtr;
12068 }
12070 DAG.getVTList(MVT::v256i1, MVT::Other),
12071 LoadOps, MVT::v256i1, NewMMO);
12072 LoadChains.push_back(Ld.getValue(1));
12073 Loads.push_back(Ld);
12074 }
12075
12076 if (Subtarget.isLittleEndian()) {
12077 std::reverse(Loads.begin(), Loads.end());
12078 std::reverse(LoadChains.begin(), LoadChains.end());
12079 }
12080
12081 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12082 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
12083 Loads[1]),
12084 0);
12085 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12086 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12087 Loads[2], Loads[3]),
12088 0);
12089 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12090 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12091 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12092
12093 SDValue Value =
12094 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12095
12096 if (IsV1024i1) {
12097 return DAG.getMergeValues({Value, TF}, dl);
12098 }
12099
12100 // Handle Loads for V2048i1 which represents a dmr pair.
12101 SDValue DmrPValue;
12102 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12103 Loads[4], Loads[5]),
12104 0);
12105 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12106 Loads[6], Loads[7]),
12107 0);
12108 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12109 SDValue Dmr1Value = SDValue(
12110 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12111
12112 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12113 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12114
12115 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12116 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12117
12118 DmrPValue = SDValue(
12119 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12120
12121 return DAG.getMergeValues({DmrPValue, TF}, dl);
12122}
12123
12124SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12125 const SDLoc &dl,
12126 SelectionDAG &DAG) const {
12127 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12128 Pairs[1]),
12129 0);
12130 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12131 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12132 Pairs[2], Pairs[3]),
12133 0);
12134 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12135 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12136
12137 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12138 {RC, Lo, LoSub, Hi, HiSub}),
12139 0);
12140}
12141
12142SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12143 SelectionDAG &DAG) const {
12144 SDLoc dl(Op);
12145 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12146 SDValue LoadChain = LN->getChain();
12147 SDValue BasePtr = LN->getBasePtr();
12148 EVT VT = Op.getValueType();
12149
12150 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12151 return LowerDMFVectorLoad(Op, DAG);
12152
12153 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12154 return Op;
12155
12156 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12157 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12158 // 2 or 4 vsx registers.
12159 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12160 "Type unsupported without MMA");
12161 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12162 "Type unsupported without paired vector support");
12163 Align Alignment = LN->getAlign();
12165 SmallVector<SDValue, 4> LoadChains;
12166 unsigned NumVecs = VT.getSizeInBits() / 128;
12167 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12168 SDValue Load =
12169 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12170 LN->getPointerInfo().getWithOffset(Idx * 16),
12171 commonAlignment(Alignment, Idx * 16),
12172 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12173 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12174 DAG.getConstant(16, dl, BasePtr.getValueType()));
12175 Loads.push_back(Load);
12176 LoadChains.push_back(Load.getValue(1));
12177 }
12178 if (Subtarget.isLittleEndian()) {
12179 std::reverse(Loads.begin(), Loads.end());
12180 std::reverse(LoadChains.begin(), LoadChains.end());
12181 }
12182 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12183 SDValue Value =
12184 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12185 dl, VT, Loads);
12186 SDValue RetOps[] = {Value, TF};
12187 return DAG.getMergeValues(RetOps, dl);
12188}
12189
12190SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12191 SelectionDAG &DAG) const {
12192
12193 SDLoc dl(Op);
12194 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12195 SDValue StoreChain = SN->getChain();
12196 SDValue BasePtr = SN->getBasePtr();
12199 EVT VT = SN->getValue().getValueType();
12200 bool IsV1024i1 = VT == MVT::v1024i1;
12201 bool IsV2048i1 = VT == MVT::v2048i1;
12202
12203 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12204 // Dense Math dmr pair registers, respectively.
12205 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12206 (void)IsV2048i1;
12207 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12208 "Dense Math support required.");
12209 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12210
12211 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12212 if (IsV1024i1) {
12214 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12215 Op.getOperand(1),
12216 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12217 0);
12219 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12220 Op.getOperand(1),
12221 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12222 0);
12223 MachineSDNode *ExtNode =
12224 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12225 Values.push_back(SDValue(ExtNode, 0));
12226 Values.push_back(SDValue(ExtNode, 1));
12227 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12228 Values.push_back(SDValue(ExtNode, 0));
12229 Values.push_back(SDValue(ExtNode, 1));
12230 } else {
12231 // This corresponds to v2048i1 which represents a dmr pair.
12232 SDValue Dmr0(
12233 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12234 Op.getOperand(1),
12235 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12236 0);
12237
12238 SDValue Dmr1(
12239 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12240 Op.getOperand(1),
12241 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12242 0);
12243
12244 SDValue Dmr0Lo(DAG.getMachineNode(
12245 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12246 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12247 0);
12248
12249 SDValue Dmr0Hi(DAG.getMachineNode(
12250 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12251 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12252 0);
12253
12254 SDValue Dmr1Lo(DAG.getMachineNode(
12255 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12256 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12257 0);
12258
12259 SDValue Dmr1Hi(DAG.getMachineNode(
12260 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12261 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12262 0);
12263
12264 MachineSDNode *ExtNode =
12265 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12266 Values.push_back(SDValue(ExtNode, 0));
12267 Values.push_back(SDValue(ExtNode, 1));
12268 ExtNode =
12269 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12270 Values.push_back(SDValue(ExtNode, 0));
12271 Values.push_back(SDValue(ExtNode, 1));
12272 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12273 Values.push_back(SDValue(ExtNode, 0));
12274 Values.push_back(SDValue(ExtNode, 1));
12275 ExtNode =
12276 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12277 Values.push_back(SDValue(ExtNode, 0));
12278 Values.push_back(SDValue(ExtNode, 1));
12279 }
12280
12281 if (Subtarget.isLittleEndian())
12282 std::reverse(Values.begin(), Values.end());
12283
12284 SDVTList Tys = DAG.getVTList(MVT::Other);
12286 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12287 Values[0], BasePtr};
12288 MachineMemOperand *MMO = SN->getMemOperand();
12289 unsigned NumVecs = VT.getSizeInBits() / 256;
12290 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12291 MachineMemOperand *NewMMO =
12292 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12293 if (Idx > 0) {
12294 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12295 DAG.getConstant(32, dl, BasePtr.getValueType()));
12296 Ops[3] = BasePtr;
12297 }
12298 Ops[2] = Values[Idx];
12299 SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
12300 MVT::v256i1, NewMMO);
12301 Stores.push_back(St);
12302 }
12303
12304 SDValue TF = DAG.getTokenFactor(dl, Stores);
12305 return TF;
12306}
12307
12308SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12309 SelectionDAG &DAG) const {
12310 SDLoc dl(Op);
12311 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12312 SDValue StoreChain = SN->getChain();
12313 SDValue BasePtr = SN->getBasePtr();
12314 SDValue Value = SN->getValue();
12315 SDValue Value2 = SN->getValue();
12316 EVT StoreVT = Value.getValueType();
12317
12318 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12319 return LowerDMFVectorStore(Op, DAG);
12320
12321 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12322 return Op;
12323
12324 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12325 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12326 // underlying registers individually.
12327 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12328 "Type unsupported without MMA");
12329 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12330 "Type unsupported without paired vector support");
12331 Align Alignment = SN->getAlign();
12333 unsigned NumVecs = 2;
12334 if (StoreVT == MVT::v512i1) {
12335 if (Subtarget.isISAFuture()) {
12336 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12337 MachineSDNode *ExtNode = DAG.getMachineNode(
12338 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12339
12340 Value = SDValue(ExtNode, 0);
12341 Value2 = SDValue(ExtNode, 1);
12342 } else
12343 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12344 NumVecs = 4;
12345 }
12346 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12347 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12348 SDValue Elt;
12349 if (Subtarget.isISAFuture()) {
12350 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12351 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12352 Idx > 1 ? Value2 : Value,
12353 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12354 } else
12355 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12356 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12357
12358 SDValue Store =
12359 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12360 SN->getPointerInfo().getWithOffset(Idx * 16),
12361 commonAlignment(Alignment, Idx * 16),
12362 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12363 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12364 DAG.getConstant(16, dl, BasePtr.getValueType()));
12365 Stores.push_back(Store);
12366 }
12367 SDValue TF = DAG.getTokenFactor(dl, Stores);
12368 return TF;
12369}
12370
12371SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12372 SDLoc dl(Op);
12373 if (Op.getValueType() == MVT::v4i32) {
12374 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12375
12376 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12377 // +16 as shift amt.
12378 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12379 SDValue RHSSwap = // = vrlw RHS, 16
12380 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12381
12382 // Shrinkify inputs to v8i16.
12383 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12384 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12385 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12386
12387 // Low parts multiplied together, generating 32-bit results (we ignore the
12388 // top parts).
12389 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12390 LHS, RHS, DAG, dl, MVT::v4i32);
12391
12392 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12393 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12394 // Shift the high parts up 16 bits.
12395 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12396 Neg16, DAG, dl);
12397 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12398 } else if (Op.getValueType() == MVT::v16i8) {
12399 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12400 bool isLittleEndian = Subtarget.isLittleEndian();
12401
12402 // Multiply the even 8-bit parts, producing 16-bit sums.
12403 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12404 LHS, RHS, DAG, dl, MVT::v8i16);
12405 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12406
12407 // Multiply the odd 8-bit parts, producing 16-bit sums.
12408 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12409 LHS, RHS, DAG, dl, MVT::v8i16);
12410 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12411
12412 // Merge the results together. Because vmuleub and vmuloub are
12413 // instructions with a big-endian bias, we must reverse the
12414 // element numbering and reverse the meaning of "odd" and "even"
12415 // when generating little endian code.
12416 int Ops[16];
12417 for (unsigned i = 0; i != 8; ++i) {
12418 if (isLittleEndian) {
12419 Ops[i*2 ] = 2*i;
12420 Ops[i*2+1] = 2*i+16;
12421 } else {
12422 Ops[i*2 ] = 2*i+1;
12423 Ops[i*2+1] = 2*i+1+16;
12424 }
12425 }
12426 if (isLittleEndian)
12427 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12428 else
12429 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12430 } else {
12431 llvm_unreachable("Unknown mul to lower!");
12432 }
12433}
12434
12435SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12436 bool IsStrict = Op->isStrictFPOpcode();
12437 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12438 !Subtarget.hasP9Vector())
12439 return SDValue();
12440
12441 return Op;
12442}
12443
12444// Custom lowering for fpext vf32 to v2f64
12445SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12446
12447 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12448 "Should only be called for ISD::FP_EXTEND");
12449
12450 // FIXME: handle extends from half precision float vectors on P9.
12451 // We only want to custom lower an extend from v2f32 to v2f64.
12452 if (Op.getValueType() != MVT::v2f64 ||
12453 Op.getOperand(0).getValueType() != MVT::v2f32)
12454 return SDValue();
12455
12456 SDLoc dl(Op);
12457 SDValue Op0 = Op.getOperand(0);
12458
12459 switch (Op0.getOpcode()) {
12460 default:
12461 return SDValue();
12463 assert(Op0.getNumOperands() == 2 &&
12464 isa<ConstantSDNode>(Op0->getOperand(1)) &&
12465 "Node should have 2 operands with second one being a constant!");
12466
12467 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12468 return SDValue();
12469
12470 // Custom lower is only done for high or low doubleword.
12471 int Idx = Op0.getConstantOperandVal(1);
12472 if (Idx % 2 != 0)
12473 return SDValue();
12474
12475 // Since input is v4f32, at this point Idx is either 0 or 2.
12476 // Shift to get the doubleword position we want.
12477 int DWord = Idx >> 1;
12478
12479 // High and low word positions are different on little endian.
12480 if (Subtarget.isLittleEndian())
12481 DWord ^= 0x1;
12482
12483 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12484 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12485 }
12486 case ISD::FADD:
12487 case ISD::FMUL:
12488 case ISD::FSUB: {
12489 SDValue NewLoad[2];
12490 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12491 // Ensure both input are loads.
12492 SDValue LdOp = Op0.getOperand(i);
12493 if (LdOp.getOpcode() != ISD::LOAD)
12494 return SDValue();
12495 // Generate new load node.
12496 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12497 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12498 NewLoad[i] = DAG.getMemIntrinsicNode(
12499 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12500 LD->getMemoryVT(), LD->getMemOperand());
12501 }
12502 SDValue NewOp =
12503 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12504 NewLoad[1], Op0.getNode()->getFlags());
12505 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12506 DAG.getConstant(0, dl, MVT::i32));
12507 }
12508 case ISD::LOAD: {
12509 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12510 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12511 SDValue NewLd = DAG.getMemIntrinsicNode(
12512 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12513 LD->getMemoryVT(), LD->getMemOperand());
12514 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12515 DAG.getConstant(0, dl, MVT::i32));
12516 }
12517 }
12518 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12519}
12520
12522 SelectionDAG &DAG,
12523 const PPCSubtarget &STI) {
12524 SDLoc DL(Value);
12525 if (STI.useCRBits())
12526 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12527 DAG.getConstant(1, DL, SumType),
12528 DAG.getConstant(0, DL, SumType));
12529 else
12530 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12531 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12532 Value, DAG.getAllOnesConstant(DL, SumType));
12533 return Sum.getValue(1);
12534}
12535
12537 EVT CarryType, SelectionDAG &DAG,
12538 const PPCSubtarget &STI) {
12539 SDLoc DL(Flag);
12540 SDValue Zero = DAG.getConstant(0, DL, SumType);
12541 SDValue Carry = DAG.getNode(
12542 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12543 if (STI.useCRBits())
12544 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12545 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12546}
12547
12548SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12549
12550 SDLoc DL(Op);
12551 SDNode *N = Op.getNode();
12552 EVT VT = N->getValueType(0);
12553 EVT CarryType = N->getValueType(1);
12554 unsigned Opc = N->getOpcode();
12555 bool IsAdd = Opc == ISD::UADDO;
12556 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12557 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12558 N->getOperand(0), N->getOperand(1));
12559 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12560 DAG, Subtarget);
12561 if (!IsAdd)
12562 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12563 DAG.getConstant(1UL, DL, CarryType));
12564 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12565}
12566
12567SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12568 SelectionDAG &DAG) const {
12569 SDLoc DL(Op);
12570 SDNode *N = Op.getNode();
12571 unsigned Opc = N->getOpcode();
12572 EVT VT = N->getValueType(0);
12573 EVT CarryType = N->getValueType(1);
12574 SDValue CarryOp = N->getOperand(2);
12575 bool IsAdd = Opc == ISD::UADDO_CARRY;
12576 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12577 if (!IsAdd)
12578 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12579 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12580 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12581 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12582 Op.getOperand(0), Op.getOperand(1), CarryOp);
12583 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12584 Subtarget);
12585 if (!IsAdd)
12586 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12587 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12588 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12589}
12590
12591SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12592
12593 SDLoc dl(Op);
12594 SDValue LHS = Op.getOperand(0);
12595 SDValue RHS = Op.getOperand(1);
12596 EVT VT = Op.getNode()->getValueType(0);
12597
12598 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12599
12600 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12601 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12602
12603 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12604
12605 SDValue Overflow =
12606 DAG.getNode(ISD::SRL, dl, VT, And,
12607 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12608
12609 SDValue OverflowTrunc =
12610 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12611
12612 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12613}
12614
12615/// LowerOperation - Provide custom lowering hooks for some operations.
12616///
12618 switch (Op.getOpcode()) {
12619 default:
12620 llvm_unreachable("Wasn't expecting to be able to lower this!");
12621 case ISD::FPOW: return lowerPow(Op, DAG);
12622 case ISD::FSIN: return lowerSin(Op, DAG);
12623 case ISD::FCOS: return lowerCos(Op, DAG);
12624 case ISD::FLOG: return lowerLog(Op, DAG);
12625 case ISD::FLOG10: return lowerLog10(Op, DAG);
12626 case ISD::FEXP: return lowerExp(Op, DAG);
12627 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12628 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12629 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12630 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12631 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12632 case ISD::STRICT_FSETCC:
12634 case ISD::SETCC: return LowerSETCC(Op, DAG);
12635 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12636 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12637 case ISD::SSUBO:
12638 return LowerSSUBO(Op, DAG);
12639
12640 case ISD::INLINEASM:
12641 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12642 // Variable argument lowering.
12643 case ISD::VASTART: return LowerVASTART(Op, DAG);
12644 case ISD::VAARG: return LowerVAARG(Op, DAG);
12645 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12646
12647 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12648 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12650 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12651
12652 // Exception handling lowering.
12653 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12654 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12655 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12656
12657 case ISD::LOAD: return LowerLOAD(Op, DAG);
12658 case ISD::STORE: return LowerSTORE(Op, DAG);
12659 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12660 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12663 case ISD::FP_TO_UINT:
12664 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12667 case ISD::UINT_TO_FP:
12668 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12669 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12670 case ISD::SET_ROUNDING:
12671 return LowerSET_ROUNDING(Op, DAG);
12672
12673 // Lower 64-bit shifts.
12674 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12675 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12676 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12677
12678 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12679 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12680
12681 // Vector-related lowering.
12682 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12683 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12684 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12685 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12686 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12687 case ISD::MUL: return LowerMUL(Op, DAG);
12688 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12690 case ISD::FP_ROUND:
12691 return LowerFP_ROUND(Op, DAG);
12692 case ISD::ROTL: return LowerROTL(Op, DAG);
12693
12694 // For counter-based loop handling.
12695 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12696
12697 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12698
12699 // Frame & Return address.
12700 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12701 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12702
12704 return LowerINTRINSIC_VOID(Op, DAG);
12705 case ISD::BSWAP:
12706 return LowerBSWAP(Op, DAG);
12708 return LowerATOMIC_CMP_SWAP(Op, DAG);
12709 case ISD::ATOMIC_STORE:
12710 return LowerATOMIC_LOAD_STORE(Op, DAG);
12711 case ISD::IS_FPCLASS:
12712 return LowerIS_FPCLASS(Op, DAG);
12713 case ISD::UADDO:
12714 case ISD::USUBO:
12715 return LowerADDSUBO(Op, DAG);
12716 case ISD::UADDO_CARRY:
12717 case ISD::USUBO_CARRY:
12718 return LowerADDSUBO_CARRY(Op, DAG);
12719 }
12720}
12721
12724 SelectionDAG &DAG) const {
12725 SDLoc dl(N);
12726 switch (N->getOpcode()) {
12727 default:
12728 llvm_unreachable("Do not know how to custom type legalize this operation!");
12729 case ISD::ATOMIC_LOAD: {
12730 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12731 Results.push_back(Res);
12732 Results.push_back(Res.getValue(1));
12733 break;
12734 }
12735 case ISD::READCYCLECOUNTER: {
12736 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12737 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12738
12739 Results.push_back(
12740 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12741 Results.push_back(RTB.getValue(2));
12742 break;
12743 }
12745 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12746 break;
12747
12748 assert(N->getValueType(0) == MVT::i1 &&
12749 "Unexpected result type for CTR decrement intrinsic");
12750 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12751 N->getValueType(0));
12752 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12753 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12754 N->getOperand(1));
12755
12756 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12757 Results.push_back(NewInt.getValue(1));
12758 break;
12759 }
12761 switch (N->getConstantOperandVal(0)) {
12762 case Intrinsic::ppc_pack_longdouble:
12763 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12764 N->getOperand(2), N->getOperand(1)));
12765 break;
12766 case Intrinsic::ppc_maxfe:
12767 case Intrinsic::ppc_minfe:
12768 case Intrinsic::ppc_fnmsub:
12769 case Intrinsic::ppc_convert_f128_to_ppcf128:
12770 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12771 break;
12772 }
12773 break;
12774 }
12775 case ISD::VAARG: {
12776 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12777 return;
12778
12779 EVT VT = N->getValueType(0);
12780
12781 if (VT == MVT::i64) {
12782 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12783
12784 Results.push_back(NewNode);
12785 Results.push_back(NewNode.getValue(1));
12786 }
12787 return;
12788 }
12791 case ISD::FP_TO_SINT:
12792 case ISD::FP_TO_UINT: {
12793 // LowerFP_TO_INT() can only handle f32 and f64.
12794 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12795 MVT::ppcf128)
12796 return;
12797 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12798 Results.push_back(LoweredValue);
12799 if (N->isStrictFPOpcode())
12800 Results.push_back(LoweredValue.getValue(1));
12801 return;
12802 }
12803 case ISD::TRUNCATE: {
12804 if (!N->getValueType(0).isVector())
12805 return;
12806 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12807 if (Lowered)
12808 Results.push_back(Lowered);
12809 return;
12810 }
12811 case ISD::SCALAR_TO_VECTOR: {
12812 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12813 if (Lowered)
12814 Results.push_back(Lowered);
12815 return;
12816 }
12817 case ISD::FSHL:
12818 case ISD::FSHR:
12819 // Don't handle funnel shifts here.
12820 return;
12821 case ISD::BITCAST:
12822 // Don't handle bitcast here.
12823 return;
12824 case ISD::FP_EXTEND:
12825 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12826 if (Lowered)
12827 Results.push_back(Lowered);
12828 return;
12829 }
12830}
12831
12832//===----------------------------------------------------------------------===//
12833// Other Lowering Code
12834//===----------------------------------------------------------------------===//
12835
12837 return Builder.CreateIntrinsic(Id, {});
12838}
12839
12841 Value *Addr,
12842 AtomicOrdering Ord) const {
12843 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12844
12845 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12846 "Only 8/16/32/64-bit atomic loads supported");
12847 Intrinsic::ID IntID;
12848 switch (SZ) {
12849 default:
12850 llvm_unreachable("Unexpected PrimitiveSize");
12851 case 8:
12852 IntID = Intrinsic::ppc_lbarx;
12853 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12854 break;
12855 case 16:
12856 IntID = Intrinsic::ppc_lharx;
12857 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12858 break;
12859 case 32:
12860 IntID = Intrinsic::ppc_lwarx;
12861 break;
12862 case 64:
12863 IntID = Intrinsic::ppc_ldarx;
12864 break;
12865 }
12866 Value *Call =
12867 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12868
12869 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12870}
12871
12872// Perform a store-conditional operation to Addr. Return the status of the
12873// store. This should be 0 if the store succeeded, non-zero otherwise.
12875 Value *Val, Value *Addr,
12876 AtomicOrdering Ord) const {
12877 Type *Ty = Val->getType();
12878 unsigned SZ = Ty->getPrimitiveSizeInBits();
12879
12880 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12881 "Only 8/16/32/64-bit atomic loads supported");
12882 Intrinsic::ID IntID;
12883 switch (SZ) {
12884 default:
12885 llvm_unreachable("Unexpected PrimitiveSize");
12886 case 8:
12887 IntID = Intrinsic::ppc_stbcx;
12888 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12889 break;
12890 case 16:
12891 IntID = Intrinsic::ppc_sthcx;
12892 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12893 break;
12894 case 32:
12895 IntID = Intrinsic::ppc_stwcx;
12896 break;
12897 case 64:
12898 IntID = Intrinsic::ppc_stdcx;
12899 break;
12900 }
12901
12902 if (SZ == 8 || SZ == 16)
12903 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12904
12905 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12906 /*FMFSource=*/nullptr, "stcx");
12907 return Builder.CreateXor(Call, Builder.getInt32(1));
12908}
12909
12910// The mappings for emitLeading/TrailingFence is taken from
12911// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12913 Instruction *Inst,
12914 AtomicOrdering Ord) const {
12916 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12917 if (isReleaseOrStronger(Ord))
12918 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12919 return nullptr;
12920}
12921
12923 Instruction *Inst,
12924 AtomicOrdering Ord) const {
12925 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12926 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12927 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12928 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12929 if (isa<LoadInst>(Inst))
12930 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12931 {Inst});
12932 // FIXME: Can use isync for rmw operation.
12933 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12934 }
12935 return nullptr;
12936}
12937
12940 unsigned AtomicSize,
12941 unsigned BinOpcode,
12942 unsigned CmpOpcode,
12943 unsigned CmpPred) const {
12944 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12945 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12946
12947 auto LoadMnemonic = PPC::LDARX;
12948 auto StoreMnemonic = PPC::STDCX;
12949 switch (AtomicSize) {
12950 default:
12951 llvm_unreachable("Unexpected size of atomic entity");
12952 case 1:
12953 LoadMnemonic = PPC::LBARX;
12954 StoreMnemonic = PPC::STBCX;
12955 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12956 break;
12957 case 2:
12958 LoadMnemonic = PPC::LHARX;
12959 StoreMnemonic = PPC::STHCX;
12960 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12961 break;
12962 case 4:
12963 LoadMnemonic = PPC::LWARX;
12964 StoreMnemonic = PPC::STWCX;
12965 break;
12966 case 8:
12967 LoadMnemonic = PPC::LDARX;
12968 StoreMnemonic = PPC::STDCX;
12969 break;
12970 }
12971
12972 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12973 MachineFunction *F = BB->getParent();
12975
12976 Register dest = MI.getOperand(0).getReg();
12977 Register ptrA = MI.getOperand(1).getReg();
12978 Register ptrB = MI.getOperand(2).getReg();
12979 Register incr = MI.getOperand(3).getReg();
12980 DebugLoc dl = MI.getDebugLoc();
12981
12982 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12983 MachineBasicBlock *loop2MBB =
12984 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12985 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12986 F->insert(It, loopMBB);
12987 if (CmpOpcode)
12988 F->insert(It, loop2MBB);
12989 F->insert(It, exitMBB);
12990 exitMBB->splice(exitMBB->begin(), BB,
12991 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12993
12994 MachineRegisterInfo &RegInfo = F->getRegInfo();
12995 Register TmpReg = (!BinOpcode) ? incr :
12996 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12997 : &PPC::GPRCRegClass);
12998
12999 // thisMBB:
13000 // ...
13001 // fallthrough --> loopMBB
13002 BB->addSuccessor(loopMBB);
13003
13004 // loopMBB:
13005 // l[wd]arx dest, ptr
13006 // add r0, dest, incr
13007 // st[wd]cx. r0, ptr
13008 // bne- loopMBB
13009 // fallthrough --> exitMBB
13010
13011 // For max/min...
13012 // loopMBB:
13013 // l[wd]arx dest, ptr
13014 // cmpl?[wd] dest, incr
13015 // bgt exitMBB
13016 // loop2MBB:
13017 // st[wd]cx. dest, ptr
13018 // bne- loopMBB
13019 // fallthrough --> exitMBB
13020
13021 BB = loopMBB;
13022 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13023 .addReg(ptrA).addReg(ptrB);
13024 if (BinOpcode)
13025 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13026 if (CmpOpcode) {
13027 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13028 // Signed comparisons of byte or halfword values must be sign-extended.
13029 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13030 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13031 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13032 ExtReg).addReg(dest);
13033 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13034 } else
13035 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13036
13037 BuildMI(BB, dl, TII->get(PPC::BCC))
13038 .addImm(CmpPred)
13039 .addReg(CrReg)
13040 .addMBB(exitMBB);
13041 BB->addSuccessor(loop2MBB);
13042 BB->addSuccessor(exitMBB);
13043 BB = loop2MBB;
13044 }
13045 BuildMI(BB, dl, TII->get(StoreMnemonic))
13046 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13047 BuildMI(BB, dl, TII->get(PPC::BCC))
13048 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
13049 BB->addSuccessor(loopMBB);
13050 BB->addSuccessor(exitMBB);
13051
13052 // exitMBB:
13053 // ...
13054 BB = exitMBB;
13055 return BB;
13056}
13057
13059 switch(MI.getOpcode()) {
13060 default:
13061 return false;
13062 case PPC::COPY:
13063 return TII->isSignExtended(MI.getOperand(1).getReg(),
13064 &MI.getMF()->getRegInfo());
13065 case PPC::LHA:
13066 case PPC::LHA8:
13067 case PPC::LHAU:
13068 case PPC::LHAU8:
13069 case PPC::LHAUX:
13070 case PPC::LHAUX8:
13071 case PPC::LHAX:
13072 case PPC::LHAX8:
13073 case PPC::LWA:
13074 case PPC::LWAUX:
13075 case PPC::LWAX:
13076 case PPC::LWAX_32:
13077 case PPC::LWA_32:
13078 case PPC::PLHA:
13079 case PPC::PLHA8:
13080 case PPC::PLHA8pc:
13081 case PPC::PLHApc:
13082 case PPC::PLWA:
13083 case PPC::PLWA8:
13084 case PPC::PLWA8pc:
13085 case PPC::PLWApc:
13086 case PPC::EXTSB:
13087 case PPC::EXTSB8:
13088 case PPC::EXTSB8_32_64:
13089 case PPC::EXTSB8_rec:
13090 case PPC::EXTSB_rec:
13091 case PPC::EXTSH:
13092 case PPC::EXTSH8:
13093 case PPC::EXTSH8_32_64:
13094 case PPC::EXTSH8_rec:
13095 case PPC::EXTSH_rec:
13096 case PPC::EXTSW:
13097 case PPC::EXTSWSLI:
13098 case PPC::EXTSWSLI_32_64:
13099 case PPC::EXTSWSLI_32_64_rec:
13100 case PPC::EXTSWSLI_rec:
13101 case PPC::EXTSW_32:
13102 case PPC::EXTSW_32_64:
13103 case PPC::EXTSW_32_64_rec:
13104 case PPC::EXTSW_rec:
13105 case PPC::SRAW:
13106 case PPC::SRAWI:
13107 case PPC::SRAWI_rec:
13108 case PPC::SRAW_rec:
13109 return true;
13110 }
13111 return false;
13112}
13113
13116 bool is8bit, // operation
13117 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13118 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13119 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13120
13121 // If this is a signed comparison and the value being compared is not known
13122 // to be sign extended, sign extend it here.
13123 DebugLoc dl = MI.getDebugLoc();
13124 MachineFunction *F = BB->getParent();
13125 MachineRegisterInfo &RegInfo = F->getRegInfo();
13126 Register incr = MI.getOperand(3).getReg();
13127 bool IsSignExtended =
13128 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13129
13130 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13131 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13132 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13133 .addReg(MI.getOperand(3).getReg());
13134 MI.getOperand(3).setReg(ValueReg);
13135 incr = ValueReg;
13136 }
13137 // If we support part-word atomic mnemonics, just use them
13138 if (Subtarget.hasPartwordAtomics())
13139 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13140 CmpPred);
13141
13142 // In 64 bit mode we have to use 64 bits for addresses, even though the
13143 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13144 // registers without caring whether they're 32 or 64, but here we're
13145 // doing actual arithmetic on the addresses.
13146 bool is64bit = Subtarget.isPPC64();
13147 bool isLittleEndian = Subtarget.isLittleEndian();
13148 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13149
13150 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13152
13153 Register dest = MI.getOperand(0).getReg();
13154 Register ptrA = MI.getOperand(1).getReg();
13155 Register ptrB = MI.getOperand(2).getReg();
13156
13157 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13158 MachineBasicBlock *loop2MBB =
13159 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13160 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13161 F->insert(It, loopMBB);
13162 if (CmpOpcode)
13163 F->insert(It, loop2MBB);
13164 F->insert(It, exitMBB);
13165 exitMBB->splice(exitMBB->begin(), BB,
13166 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13168
13169 const TargetRegisterClass *RC =
13170 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13171 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13172
13173 Register PtrReg = RegInfo.createVirtualRegister(RC);
13174 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13175 Register ShiftReg =
13176 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13177 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13178 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13179 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13180 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13181 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13182 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13183 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13184 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13185 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13186 Register Ptr1Reg;
13187 Register TmpReg =
13188 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13189
13190 // thisMBB:
13191 // ...
13192 // fallthrough --> loopMBB
13193 BB->addSuccessor(loopMBB);
13194
13195 // The 4-byte load must be aligned, while a char or short may be
13196 // anywhere in the word. Hence all this nasty bookkeeping code.
13197 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13198 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13199 // xori shift, shift1, 24 [16]
13200 // rlwinm ptr, ptr1, 0, 0, 29
13201 // slw incr2, incr, shift
13202 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13203 // slw mask, mask2, shift
13204 // loopMBB:
13205 // lwarx tmpDest, ptr
13206 // add tmp, tmpDest, incr2
13207 // andc tmp2, tmpDest, mask
13208 // and tmp3, tmp, mask
13209 // or tmp4, tmp3, tmp2
13210 // stwcx. tmp4, ptr
13211 // bne- loopMBB
13212 // fallthrough --> exitMBB
13213 // srw SrwDest, tmpDest, shift
13214 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13215 if (ptrA != ZeroReg) {
13216 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13217 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13218 .addReg(ptrA)
13219 .addReg(ptrB);
13220 } else {
13221 Ptr1Reg = ptrB;
13222 }
13223 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13224 // mode.
13225 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13226 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13227 .addImm(3)
13228 .addImm(27)
13229 .addImm(is8bit ? 28 : 27);
13230 if (!isLittleEndian)
13231 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13232 .addReg(Shift1Reg)
13233 .addImm(is8bit ? 24 : 16);
13234 if (is64bit)
13235 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13236 .addReg(Ptr1Reg)
13237 .addImm(0)
13238 .addImm(61);
13239 else
13240 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13241 .addReg(Ptr1Reg)
13242 .addImm(0)
13243 .addImm(0)
13244 .addImm(29);
13245 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13246 if (is8bit)
13247 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13248 else {
13249 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13250 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13251 .addReg(Mask3Reg)
13252 .addImm(65535);
13253 }
13254 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13255 .addReg(Mask2Reg)
13256 .addReg(ShiftReg);
13257
13258 BB = loopMBB;
13259 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13260 .addReg(ZeroReg)
13261 .addReg(PtrReg);
13262 if (BinOpcode)
13263 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13264 .addReg(Incr2Reg)
13265 .addReg(TmpDestReg);
13266 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13267 .addReg(TmpDestReg)
13268 .addReg(MaskReg);
13269 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13270 if (CmpOpcode) {
13271 // For unsigned comparisons, we can directly compare the shifted values.
13272 // For signed comparisons we shift and sign extend.
13273 Register SReg = RegInfo.createVirtualRegister(GPRC);
13274 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13275 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13276 .addReg(TmpDestReg)
13277 .addReg(MaskReg);
13278 unsigned ValueReg = SReg;
13279 unsigned CmpReg = Incr2Reg;
13280 if (CmpOpcode == PPC::CMPW) {
13281 ValueReg = RegInfo.createVirtualRegister(GPRC);
13282 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13283 .addReg(SReg)
13284 .addReg(ShiftReg);
13285 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13286 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13287 .addReg(ValueReg);
13288 ValueReg = ValueSReg;
13289 CmpReg = incr;
13290 }
13291 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13292 BuildMI(BB, dl, TII->get(PPC::BCC))
13293 .addImm(CmpPred)
13294 .addReg(CrReg)
13295 .addMBB(exitMBB);
13296 BB->addSuccessor(loop2MBB);
13297 BB->addSuccessor(exitMBB);
13298 BB = loop2MBB;
13299 }
13300 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13301 BuildMI(BB, dl, TII->get(PPC::STWCX))
13302 .addReg(Tmp4Reg)
13303 .addReg(ZeroReg)
13304 .addReg(PtrReg);
13305 BuildMI(BB, dl, TII->get(PPC::BCC))
13307 .addReg(PPC::CR0)
13308 .addMBB(loopMBB);
13309 BB->addSuccessor(loopMBB);
13310 BB->addSuccessor(exitMBB);
13311
13312 // exitMBB:
13313 // ...
13314 BB = exitMBB;
13315 // Since the shift amount is not a constant, we need to clear
13316 // the upper bits with a separate RLWINM.
13317 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13318 .addReg(SrwDestReg)
13319 .addImm(0)
13320 .addImm(is8bit ? 24 : 16)
13321 .addImm(31);
13322 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13323 .addReg(TmpDestReg)
13324 .addReg(ShiftReg);
13325 return BB;
13326}
13327
13330 MachineBasicBlock *MBB) const {
13331 DebugLoc DL = MI.getDebugLoc();
13332 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13333 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13334
13335 MachineFunction *MF = MBB->getParent();
13337
13338 const BasicBlock *BB = MBB->getBasicBlock();
13340
13341 Register DstReg = MI.getOperand(0).getReg();
13342 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13343 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13344 Register mainDstReg = MRI.createVirtualRegister(RC);
13345 Register restoreDstReg = MRI.createVirtualRegister(RC);
13346
13347 MVT PVT = getPointerTy(MF->getDataLayout());
13348 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13349 "Invalid Pointer Size!");
13350 // For v = setjmp(buf), we generate
13351 //
13352 // thisMBB:
13353 // SjLjSetup mainMBB
13354 // bl mainMBB
13355 // v_restore = 1
13356 // b sinkMBB
13357 //
13358 // mainMBB:
13359 // buf[LabelOffset] = LR
13360 // v_main = 0
13361 //
13362 // sinkMBB:
13363 // v = phi(main, restore)
13364 //
13365
13366 MachineBasicBlock *thisMBB = MBB;
13367 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13368 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13369 MF->insert(I, mainMBB);
13370 MF->insert(I, sinkMBB);
13371
13373
13374 // Transfer the remainder of BB and its successor edges to sinkMBB.
13375 sinkMBB->splice(sinkMBB->begin(), MBB,
13376 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13378
13379 // Note that the structure of the jmp_buf used here is not compatible
13380 // with that used by libc, and is not designed to be. Specifically, it
13381 // stores only those 'reserved' registers that LLVM does not otherwise
13382 // understand how to spill. Also, by convention, by the time this
13383 // intrinsic is called, Clang has already stored the frame address in the
13384 // first slot of the buffer and stack address in the third. Following the
13385 // X86 target code, we'll store the jump address in the second slot. We also
13386 // need to save the TOC pointer (R2) to handle jumps between shared
13387 // libraries, and that will be stored in the fourth slot. The thread
13388 // identifier (R13) is not affected.
13389
13390 // thisMBB:
13391 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13392 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13393 const int64_t BPOffset = 4 * PVT.getStoreSize();
13394
13395 // Prepare IP either in reg.
13396 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13397 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13398 Register BufReg = MI.getOperand(1).getReg();
13399
13400 if (Subtarget.is64BitELFABI()) {
13402 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13403 .addReg(PPC::X2)
13404 .addImm(TOCOffset)
13405 .addReg(BufReg)
13406 .cloneMemRefs(MI);
13407 }
13408
13409 // Naked functions never have a base pointer, and so we use r1. For all
13410 // other functions, this decision must be delayed until during PEI.
13411 unsigned BaseReg;
13412 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13413 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13414 else
13415 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13416
13417 MIB = BuildMI(*thisMBB, MI, DL,
13418 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13419 .addReg(BaseReg)
13420 .addImm(BPOffset)
13421 .addReg(BufReg)
13422 .cloneMemRefs(MI);
13423
13424 // Setup
13425 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13426 MIB.addRegMask(TRI->getNoPreservedMask());
13427
13428 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13429
13430 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13431 .addMBB(mainMBB);
13432 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13433
13434 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13435 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13436
13437 // mainMBB:
13438 // mainDstReg = 0
13439 MIB =
13440 BuildMI(mainMBB, DL,
13441 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13442
13443 // Store IP
13444 if (Subtarget.isPPC64()) {
13445 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13446 .addReg(LabelReg)
13447 .addImm(LabelOffset)
13448 .addReg(BufReg);
13449 } else {
13450 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13451 .addReg(LabelReg)
13452 .addImm(LabelOffset)
13453 .addReg(BufReg);
13454 }
13455 MIB.cloneMemRefs(MI);
13456
13457 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13458 mainMBB->addSuccessor(sinkMBB);
13459
13460 // sinkMBB:
13461 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13462 TII->get(PPC::PHI), DstReg)
13463 .addReg(mainDstReg).addMBB(mainMBB)
13464 .addReg(restoreDstReg).addMBB(thisMBB);
13465
13466 MI.eraseFromParent();
13467 return sinkMBB;
13468}
13469
13472 MachineBasicBlock *MBB) const {
13473 DebugLoc DL = MI.getDebugLoc();
13474 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13475
13476 MachineFunction *MF = MBB->getParent();
13478
13479 MVT PVT = getPointerTy(MF->getDataLayout());
13480 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13481 "Invalid Pointer Size!");
13482
13483 const TargetRegisterClass *RC =
13484 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13485 Register Tmp = MRI.createVirtualRegister(RC);
13486 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13487 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13488 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13489 unsigned BP =
13490 (PVT == MVT::i64)
13491 ? PPC::X30
13492 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13493 : PPC::R30);
13494
13496
13497 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13498 const int64_t SPOffset = 2 * PVT.getStoreSize();
13499 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13500 const int64_t BPOffset = 4 * PVT.getStoreSize();
13501
13502 Register BufReg = MI.getOperand(0).getReg();
13503
13504 // Reload FP (the jumped-to function may not have had a
13505 // frame pointer, and if so, then its r31 will be restored
13506 // as necessary).
13507 if (PVT == MVT::i64) {
13508 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13509 .addImm(0)
13510 .addReg(BufReg);
13511 } else {
13512 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13513 .addImm(0)
13514 .addReg(BufReg);
13515 }
13516 MIB.cloneMemRefs(MI);
13517
13518 // Reload IP
13519 if (PVT == MVT::i64) {
13520 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13521 .addImm(LabelOffset)
13522 .addReg(BufReg);
13523 } else {
13524 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13525 .addImm(LabelOffset)
13526 .addReg(BufReg);
13527 }
13528 MIB.cloneMemRefs(MI);
13529
13530 // Reload SP
13531 if (PVT == MVT::i64) {
13532 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13533 .addImm(SPOffset)
13534 .addReg(BufReg);
13535 } else {
13536 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13537 .addImm(SPOffset)
13538 .addReg(BufReg);
13539 }
13540 MIB.cloneMemRefs(MI);
13541
13542 // Reload BP
13543 if (PVT == MVT::i64) {
13544 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13545 .addImm(BPOffset)
13546 .addReg(BufReg);
13547 } else {
13548 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13549 .addImm(BPOffset)
13550 .addReg(BufReg);
13551 }
13552 MIB.cloneMemRefs(MI);
13553
13554 // Reload TOC
13555 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13557 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13558 .addImm(TOCOffset)
13559 .addReg(BufReg)
13560 .cloneMemRefs(MI);
13561 }
13562
13563 // Jump
13564 BuildMI(*MBB, MI, DL,
13565 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13566 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13567
13568 MI.eraseFromParent();
13569 return MBB;
13570}
13571
13573 // If the function specifically requests inline stack probes, emit them.
13574 if (MF.getFunction().hasFnAttribute("probe-stack"))
13575 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13576 "inline-asm";
13577 return false;
13578}
13579
13581 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13582 unsigned StackAlign = TFI->getStackAlignment();
13583 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13584 "Unexpected stack alignment");
13585 // The default stack probe size is 4096 if the function has no
13586 // stack-probe-size attribute.
13587 const Function &Fn = MF.getFunction();
13588 unsigned StackProbeSize =
13589 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13590 // Round down to the stack alignment.
13591 StackProbeSize &= ~(StackAlign - 1);
13592 return StackProbeSize ? StackProbeSize : StackAlign;
13593}
13594
13595// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13596// into three phases. In the first phase, it uses pseudo instruction
13597// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13598// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13599// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13600// MaxCallFrameSize so that it can calculate correct data area pointer.
13603 MachineBasicBlock *MBB) const {
13604 const bool isPPC64 = Subtarget.isPPC64();
13605 MachineFunction *MF = MBB->getParent();
13606 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13607 DebugLoc DL = MI.getDebugLoc();
13608 const unsigned ProbeSize = getStackProbeSize(*MF);
13609 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13611 // The CFG of probing stack looks as
13612 // +-----+
13613 // | MBB |
13614 // +--+--+
13615 // |
13616 // +----v----+
13617 // +--->+ TestMBB +---+
13618 // | +----+----+ |
13619 // | | |
13620 // | +-----v----+ |
13621 // +---+ BlockMBB | |
13622 // +----------+ |
13623 // |
13624 // +---------+ |
13625 // | TailMBB +<--+
13626 // +---------+
13627 // In MBB, calculate previous frame pointer and final stack pointer.
13628 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13629 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13630 // TailMBB is spliced via \p MI.
13631 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13632 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13633 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13634
13636 MF->insert(MBBIter, TestMBB);
13637 MF->insert(MBBIter, BlockMBB);
13638 MF->insert(MBBIter, TailMBB);
13639
13640 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13641 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13642
13643 Register DstReg = MI.getOperand(0).getReg();
13644 Register NegSizeReg = MI.getOperand(1).getReg();
13645 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13646 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13647 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13648 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13649
13650 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13651 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13652 // NegSize.
13653 unsigned ProbeOpc;
13654 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13655 ProbeOpc =
13656 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13657 else
13658 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13659 // and NegSizeReg will be allocated in the same phyreg to avoid
13660 // redundant copy when NegSizeReg has only one use which is current MI and
13661 // will be replaced by PREPARE_PROBED_ALLOCA then.
13662 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13663 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13664 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13665 .addDef(ActualNegSizeReg)
13666 .addReg(NegSizeReg)
13667 .add(MI.getOperand(2))
13668 .add(MI.getOperand(3));
13669
13670 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13671 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13672 FinalStackPtr)
13673 .addReg(SPReg)
13674 .addReg(ActualNegSizeReg);
13675
13676 // Materialize a scratch register for update.
13677 int64_t NegProbeSize = -(int64_t)ProbeSize;
13678 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13679 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13680 if (!isInt<16>(NegProbeSize)) {
13681 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13682 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13683 .addImm(NegProbeSize >> 16);
13684 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13685 ScratchReg)
13686 .addReg(TempReg)
13687 .addImm(NegProbeSize & 0xFFFF);
13688 } else
13689 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13690 .addImm(NegProbeSize);
13691
13692 {
13693 // Probing leading residual part.
13694 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13695 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13696 .addReg(ActualNegSizeReg)
13697 .addReg(ScratchReg);
13698 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13699 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13700 .addReg(Div)
13701 .addReg(ScratchReg);
13702 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13703 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13704 .addReg(Mul)
13705 .addReg(ActualNegSizeReg);
13706 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13707 .addReg(FramePointer)
13708 .addReg(SPReg)
13709 .addReg(NegMod);
13710 }
13711
13712 {
13713 // Remaining part should be multiple of ProbeSize.
13714 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13715 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13716 .addReg(SPReg)
13717 .addReg(FinalStackPtr);
13718 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13720 .addReg(CmpResult)
13721 .addMBB(TailMBB);
13722 TestMBB->addSuccessor(BlockMBB);
13723 TestMBB->addSuccessor(TailMBB);
13724 }
13725
13726 {
13727 // Touch the block.
13728 // |P...|P...|P...
13729 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13730 .addReg(FramePointer)
13731 .addReg(SPReg)
13732 .addReg(ScratchReg);
13733 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13734 BlockMBB->addSuccessor(TestMBB);
13735 }
13736
13737 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13738 // DYNAREAOFFSET pseudo instruction to get the future result.
13739 Register MaxCallFrameSizeReg =
13740 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13741 BuildMI(TailMBB, DL,
13742 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13743 MaxCallFrameSizeReg)
13744 .add(MI.getOperand(2))
13745 .add(MI.getOperand(3));
13746 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13747 .addReg(SPReg)
13748 .addReg(MaxCallFrameSizeReg);
13749
13750 // Splice instructions after MI to TailMBB.
13751 TailMBB->splice(TailMBB->end(), MBB,
13752 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13754 MBB->addSuccessor(TestMBB);
13755
13756 // Delete the pseudo instruction.
13757 MI.eraseFromParent();
13758
13759 ++NumDynamicAllocaProbed;
13760 return TailMBB;
13761}
13762
13764 switch (MI.getOpcode()) {
13765 case PPC::SELECT_CC_I4:
13766 case PPC::SELECT_CC_I8:
13767 case PPC::SELECT_CC_F4:
13768 case PPC::SELECT_CC_F8:
13769 case PPC::SELECT_CC_F16:
13770 case PPC::SELECT_CC_VRRC:
13771 case PPC::SELECT_CC_VSFRC:
13772 case PPC::SELECT_CC_VSSRC:
13773 case PPC::SELECT_CC_VSRC:
13774 case PPC::SELECT_CC_SPE4:
13775 case PPC::SELECT_CC_SPE:
13776 return true;
13777 default:
13778 return false;
13779 }
13780}
13781
13782static bool IsSelect(MachineInstr &MI) {
13783 switch (MI.getOpcode()) {
13784 case PPC::SELECT_I4:
13785 case PPC::SELECT_I8:
13786 case PPC::SELECT_F4:
13787 case PPC::SELECT_F8:
13788 case PPC::SELECT_F16:
13789 case PPC::SELECT_SPE:
13790 case PPC::SELECT_SPE4:
13791 case PPC::SELECT_VRRC:
13792 case PPC::SELECT_VSFRC:
13793 case PPC::SELECT_VSSRC:
13794 case PPC::SELECT_VSRC:
13795 return true;
13796 default:
13797 return false;
13798 }
13799}
13800
13803 MachineBasicBlock *BB) const {
13804 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13805 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13806 if (Subtarget.is64BitELFABI() &&
13807 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13808 !Subtarget.isUsingPCRelativeCalls()) {
13809 // Call lowering should have added an r2 operand to indicate a dependence
13810 // on the TOC base pointer value. It can't however, because there is no
13811 // way to mark the dependence as implicit there, and so the stackmap code
13812 // will confuse it with a regular operand. Instead, add the dependence
13813 // here.
13814 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13815 }
13816
13817 return emitPatchPoint(MI, BB);
13818 }
13819
13820 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13821 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13822 return emitEHSjLjSetJmp(MI, BB);
13823 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13824 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13825 return emitEHSjLjLongJmp(MI, BB);
13826 }
13827
13828 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13829
13830 // To "insert" these instructions we actually have to insert their
13831 // control-flow patterns.
13832 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13834
13835 MachineFunction *F = BB->getParent();
13836 MachineRegisterInfo &MRI = F->getRegInfo();
13837
13838 if (Subtarget.hasISEL() &&
13839 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13840 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13841 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13843 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13844 MI.getOpcode() == PPC::SELECT_CC_I8)
13845 Cond.push_back(MI.getOperand(4));
13846 else
13848 Cond.push_back(MI.getOperand(1));
13849
13850 DebugLoc dl = MI.getDebugLoc();
13851 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13852 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13853 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13854 // The incoming instruction knows the destination vreg to set, the
13855 // condition code register to branch on, the true/false values to
13856 // select between, and a branch opcode to use.
13857
13858 // thisMBB:
13859 // ...
13860 // TrueVal = ...
13861 // cmpTY ccX, r1, r2
13862 // bCC sinkMBB
13863 // fallthrough --> copy0MBB
13864 MachineBasicBlock *thisMBB = BB;
13865 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13866 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13867 DebugLoc dl = MI.getDebugLoc();
13868 F->insert(It, copy0MBB);
13869 F->insert(It, sinkMBB);
13870
13871 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13872 copy0MBB->addLiveIn(PPC::CARRY);
13873 sinkMBB->addLiveIn(PPC::CARRY);
13874 }
13875
13876 // Set the call frame size on entry to the new basic blocks.
13877 // See https://reviews.llvm.org/D156113.
13878 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13879 copy0MBB->setCallFrameSize(CallFrameSize);
13880 sinkMBB->setCallFrameSize(CallFrameSize);
13881
13882 // Transfer the remainder of BB and its successor edges to sinkMBB.
13883 sinkMBB->splice(sinkMBB->begin(), BB,
13884 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13886
13887 // Next, add the true and fallthrough blocks as its successors.
13888 BB->addSuccessor(copy0MBB);
13889 BB->addSuccessor(sinkMBB);
13890
13891 if (IsSelect(MI)) {
13892 BuildMI(BB, dl, TII->get(PPC::BC))
13893 .addReg(MI.getOperand(1).getReg())
13894 .addMBB(sinkMBB);
13895 } else {
13896 unsigned SelectPred = MI.getOperand(4).getImm();
13897 BuildMI(BB, dl, TII->get(PPC::BCC))
13898 .addImm(SelectPred)
13899 .addReg(MI.getOperand(1).getReg())
13900 .addMBB(sinkMBB);
13901 }
13902
13903 // copy0MBB:
13904 // %FalseValue = ...
13905 // # fallthrough to sinkMBB
13906 BB = copy0MBB;
13907
13908 // Update machine-CFG edges
13909 BB->addSuccessor(sinkMBB);
13910
13911 // sinkMBB:
13912 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13913 // ...
13914 BB = sinkMBB;
13915 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13916 .addReg(MI.getOperand(3).getReg())
13917 .addMBB(copy0MBB)
13918 .addReg(MI.getOperand(2).getReg())
13919 .addMBB(thisMBB);
13920 } else if (MI.getOpcode() == PPC::ReadTB) {
13921 // To read the 64-bit time-base register on a 32-bit target, we read the
13922 // two halves. Should the counter have wrapped while it was being read, we
13923 // need to try again.
13924 // ...
13925 // readLoop:
13926 // mfspr Rx,TBU # load from TBU
13927 // mfspr Ry,TB # load from TB
13928 // mfspr Rz,TBU # load from TBU
13929 // cmpw crX,Rx,Rz # check if 'old'='new'
13930 // bne readLoop # branch if they're not equal
13931 // ...
13932
13933 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13934 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13935 DebugLoc dl = MI.getDebugLoc();
13936 F->insert(It, readMBB);
13937 F->insert(It, sinkMBB);
13938
13939 // Transfer the remainder of BB and its successor edges to sinkMBB.
13940 sinkMBB->splice(sinkMBB->begin(), BB,
13941 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13943
13944 BB->addSuccessor(readMBB);
13945 BB = readMBB;
13946
13947 MachineRegisterInfo &RegInfo = F->getRegInfo();
13948 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13949 Register LoReg = MI.getOperand(0).getReg();
13950 Register HiReg = MI.getOperand(1).getReg();
13951
13952 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13953 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13954 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13955
13956 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13957
13958 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13959 .addReg(HiReg)
13960 .addReg(ReadAgainReg);
13961 BuildMI(BB, dl, TII->get(PPC::BCC))
13963 .addReg(CmpReg)
13964 .addMBB(readMBB);
13965
13966 BB->addSuccessor(readMBB);
13967 BB->addSuccessor(sinkMBB);
13968 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13969 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13970 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13971 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13972 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13973 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13974 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13975 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13976
13977 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13978 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13979 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13980 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13981 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13982 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13983 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13984 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13985
13986 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13987 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13988 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13989 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13990 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13991 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13992 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13993 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13994
13995 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13996 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13997 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13998 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13999 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14000 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14001 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14002 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14003
14004 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14005 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14006 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14007 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14008 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14009 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14010 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14011 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14012
14013 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14014 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14015 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14016 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14017 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14018 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14019 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14020 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14021
14022 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14023 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14024 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14025 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14026 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14027 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14028 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14029 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14030
14031 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14032 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14033 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14034 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14035 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14036 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14037 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14038 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14039
14040 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14041 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14042 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14043 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14044 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14045 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14046 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14047 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14048
14049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14050 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14051 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14052 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14053 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14054 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14055 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14056 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14057
14058 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14059 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14060 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14061 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14062 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14063 BB = EmitAtomicBinary(MI, BB, 4, 0);
14064 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14065 BB = EmitAtomicBinary(MI, BB, 8, 0);
14066 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14067 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14068 (Subtarget.hasPartwordAtomics() &&
14069 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14070 (Subtarget.hasPartwordAtomics() &&
14071 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14072 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14073
14074 auto LoadMnemonic = PPC::LDARX;
14075 auto StoreMnemonic = PPC::STDCX;
14076 switch (MI.getOpcode()) {
14077 default:
14078 llvm_unreachable("Compare and swap of unknown size");
14079 case PPC::ATOMIC_CMP_SWAP_I8:
14080 LoadMnemonic = PPC::LBARX;
14081 StoreMnemonic = PPC::STBCX;
14082 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14083 break;
14084 case PPC::ATOMIC_CMP_SWAP_I16:
14085 LoadMnemonic = PPC::LHARX;
14086 StoreMnemonic = PPC::STHCX;
14087 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14088 break;
14089 case PPC::ATOMIC_CMP_SWAP_I32:
14090 LoadMnemonic = PPC::LWARX;
14091 StoreMnemonic = PPC::STWCX;
14092 break;
14093 case PPC::ATOMIC_CMP_SWAP_I64:
14094 LoadMnemonic = PPC::LDARX;
14095 StoreMnemonic = PPC::STDCX;
14096 break;
14097 }
14098 MachineRegisterInfo &RegInfo = F->getRegInfo();
14099 Register dest = MI.getOperand(0).getReg();
14100 Register ptrA = MI.getOperand(1).getReg();
14101 Register ptrB = MI.getOperand(2).getReg();
14102 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14103 Register oldval = MI.getOperand(3).getReg();
14104 Register newval = MI.getOperand(4).getReg();
14105 DebugLoc dl = MI.getDebugLoc();
14106
14107 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14108 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14109 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14110 F->insert(It, loop1MBB);
14111 F->insert(It, loop2MBB);
14112 F->insert(It, exitMBB);
14113 exitMBB->splice(exitMBB->begin(), BB,
14114 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14116
14117 // thisMBB:
14118 // ...
14119 // fallthrough --> loopMBB
14120 BB->addSuccessor(loop1MBB);
14121
14122 // loop1MBB:
14123 // l[bhwd]arx dest, ptr
14124 // cmp[wd] dest, oldval
14125 // bne- exitBB
14126 // loop2MBB:
14127 // st[bhwd]cx. newval, ptr
14128 // bne- loopMBB
14129 // b exitBB
14130 // exitBB:
14131 BB = loop1MBB;
14132 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14133 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14134 .addReg(dest)
14135 .addReg(oldval);
14136 BuildMI(BB, dl, TII->get(PPC::BCC))
14138 .addReg(CrReg)
14139 .addMBB(exitMBB);
14140 BB->addSuccessor(loop2MBB);
14141 BB->addSuccessor(exitMBB);
14142
14143 BB = loop2MBB;
14144 BuildMI(BB, dl, TII->get(StoreMnemonic))
14145 .addReg(newval)
14146 .addReg(ptrA)
14147 .addReg(ptrB);
14148 BuildMI(BB, dl, TII->get(PPC::BCC))
14150 .addReg(PPC::CR0)
14151 .addMBB(loop1MBB);
14152 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14153 BB->addSuccessor(loop1MBB);
14154 BB->addSuccessor(exitMBB);
14155
14156 // exitMBB:
14157 // ...
14158 BB = exitMBB;
14159 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14160 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14161 // We must use 64-bit registers for addresses when targeting 64-bit,
14162 // since we're actually doing arithmetic on them. Other registers
14163 // can be 32-bit.
14164 bool is64bit = Subtarget.isPPC64();
14165 bool isLittleEndian = Subtarget.isLittleEndian();
14166 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14167
14168 Register dest = MI.getOperand(0).getReg();
14169 Register ptrA = MI.getOperand(1).getReg();
14170 Register ptrB = MI.getOperand(2).getReg();
14171 Register oldval = MI.getOperand(3).getReg();
14172 Register newval = MI.getOperand(4).getReg();
14173 DebugLoc dl = MI.getDebugLoc();
14174
14175 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14176 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14177 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14178 F->insert(It, loop1MBB);
14179 F->insert(It, loop2MBB);
14180 F->insert(It, exitMBB);
14181 exitMBB->splice(exitMBB->begin(), BB,
14182 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14184
14185 MachineRegisterInfo &RegInfo = F->getRegInfo();
14186 const TargetRegisterClass *RC =
14187 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14188 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14189
14190 Register PtrReg = RegInfo.createVirtualRegister(RC);
14191 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14192 Register ShiftReg =
14193 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14194 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14195 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14196 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14197 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14198 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14199 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14200 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14201 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14202 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14203 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14204 Register Ptr1Reg;
14205 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14206 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14207 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14208 // thisMBB:
14209 // ...
14210 // fallthrough --> loopMBB
14211 BB->addSuccessor(loop1MBB);
14212
14213 // The 4-byte load must be aligned, while a char or short may be
14214 // anywhere in the word. Hence all this nasty bookkeeping code.
14215 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14216 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14217 // xori shift, shift1, 24 [16]
14218 // rlwinm ptr, ptr1, 0, 0, 29
14219 // slw newval2, newval, shift
14220 // slw oldval2, oldval,shift
14221 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14222 // slw mask, mask2, shift
14223 // and newval3, newval2, mask
14224 // and oldval3, oldval2, mask
14225 // loop1MBB:
14226 // lwarx tmpDest, ptr
14227 // and tmp, tmpDest, mask
14228 // cmpw tmp, oldval3
14229 // bne- exitBB
14230 // loop2MBB:
14231 // andc tmp2, tmpDest, mask
14232 // or tmp4, tmp2, newval3
14233 // stwcx. tmp4, ptr
14234 // bne- loop1MBB
14235 // b exitBB
14236 // exitBB:
14237 // srw dest, tmpDest, shift
14238 if (ptrA != ZeroReg) {
14239 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14240 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14241 .addReg(ptrA)
14242 .addReg(ptrB);
14243 } else {
14244 Ptr1Reg = ptrB;
14245 }
14246
14247 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14248 // mode.
14249 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14250 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14251 .addImm(3)
14252 .addImm(27)
14253 .addImm(is8bit ? 28 : 27);
14254 if (!isLittleEndian)
14255 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14256 .addReg(Shift1Reg)
14257 .addImm(is8bit ? 24 : 16);
14258 if (is64bit)
14259 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14260 .addReg(Ptr1Reg)
14261 .addImm(0)
14262 .addImm(61);
14263 else
14264 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14265 .addReg(Ptr1Reg)
14266 .addImm(0)
14267 .addImm(0)
14268 .addImm(29);
14269 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14270 .addReg(newval)
14271 .addReg(ShiftReg);
14272 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14273 .addReg(oldval)
14274 .addReg(ShiftReg);
14275 if (is8bit)
14276 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14277 else {
14278 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14279 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14280 .addReg(Mask3Reg)
14281 .addImm(65535);
14282 }
14283 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14284 .addReg(Mask2Reg)
14285 .addReg(ShiftReg);
14286 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14287 .addReg(NewVal2Reg)
14288 .addReg(MaskReg);
14289 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14290 .addReg(OldVal2Reg)
14291 .addReg(MaskReg);
14292
14293 BB = loop1MBB;
14294 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14295 .addReg(ZeroReg)
14296 .addReg(PtrReg);
14297 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14298 .addReg(TmpDestReg)
14299 .addReg(MaskReg);
14300 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14301 .addReg(TmpReg)
14302 .addReg(OldVal3Reg);
14303 BuildMI(BB, dl, TII->get(PPC::BCC))
14305 .addReg(CrReg)
14306 .addMBB(exitMBB);
14307 BB->addSuccessor(loop2MBB);
14308 BB->addSuccessor(exitMBB);
14309
14310 BB = loop2MBB;
14311 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14312 .addReg(TmpDestReg)
14313 .addReg(MaskReg);
14314 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14315 .addReg(Tmp2Reg)
14316 .addReg(NewVal3Reg);
14317 BuildMI(BB, dl, TII->get(PPC::STWCX))
14318 .addReg(Tmp4Reg)
14319 .addReg(ZeroReg)
14320 .addReg(PtrReg);
14321 BuildMI(BB, dl, TII->get(PPC::BCC))
14323 .addReg(PPC::CR0)
14324 .addMBB(loop1MBB);
14325 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14326 BB->addSuccessor(loop1MBB);
14327 BB->addSuccessor(exitMBB);
14328
14329 // exitMBB:
14330 // ...
14331 BB = exitMBB;
14332 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14333 .addReg(TmpReg)
14334 .addReg(ShiftReg);
14335 } else if (MI.getOpcode() == PPC::FADDrtz) {
14336 // This pseudo performs an FADD with rounding mode temporarily forced
14337 // to round-to-zero. We emit this via custom inserter since the FPSCR
14338 // is not modeled at the SelectionDAG level.
14339 Register Dest = MI.getOperand(0).getReg();
14340 Register Src1 = MI.getOperand(1).getReg();
14341 Register Src2 = MI.getOperand(2).getReg();
14342 DebugLoc dl = MI.getDebugLoc();
14343
14344 MachineRegisterInfo &RegInfo = F->getRegInfo();
14345 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14346
14347 // Save FPSCR value.
14348 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14349
14350 // Set rounding mode to round-to-zero.
14351 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14352 .addImm(31)
14354
14355 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14356 .addImm(30)
14358
14359 // Perform addition.
14360 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14361 .addReg(Src1)
14362 .addReg(Src2);
14363 if (MI.getFlag(MachineInstr::NoFPExcept))
14365
14366 // Restore FPSCR value.
14367 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14368 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14369 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14370 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14371 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14372 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14373 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14374 ? PPC::ANDI8_rec
14375 : PPC::ANDI_rec;
14376 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14377 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14378
14379 MachineRegisterInfo &RegInfo = F->getRegInfo();
14380 Register Dest = RegInfo.createVirtualRegister(
14381 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14382
14383 DebugLoc Dl = MI.getDebugLoc();
14384 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14385 .addReg(MI.getOperand(1).getReg())
14386 .addImm(1);
14387 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14388 MI.getOperand(0).getReg())
14389 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14390 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14391 DebugLoc Dl = MI.getDebugLoc();
14392 MachineRegisterInfo &RegInfo = F->getRegInfo();
14393 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14394 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14395 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14396 MI.getOperand(0).getReg())
14397 .addReg(CRReg);
14398 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14399 DebugLoc Dl = MI.getDebugLoc();
14400 unsigned Imm = MI.getOperand(1).getImm();
14401 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14402 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14403 MI.getOperand(0).getReg())
14404 .addReg(PPC::CR0EQ);
14405 } else if (MI.getOpcode() == PPC::SETRNDi) {
14406 DebugLoc dl = MI.getDebugLoc();
14407 Register OldFPSCRReg = MI.getOperand(0).getReg();
14408
14409 // Save FPSCR value.
14410 if (MRI.use_empty(OldFPSCRReg))
14411 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14412 else
14413 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14414
14415 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14416 // the following settings:
14417 // 00 Round to nearest
14418 // 01 Round to 0
14419 // 10 Round to +inf
14420 // 11 Round to -inf
14421
14422 // When the operand is immediate, using the two least significant bits of
14423 // the immediate to set the bits 62:63 of FPSCR.
14424 unsigned Mode = MI.getOperand(1).getImm();
14425 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14426 .addImm(31)
14428
14429 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14430 .addImm(30)
14432 } else if (MI.getOpcode() == PPC::SETRND) {
14433 DebugLoc dl = MI.getDebugLoc();
14434
14435 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14436 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14437 // If the target doesn't have DirectMove, we should use stack to do the
14438 // conversion, because the target doesn't have the instructions like mtvsrd
14439 // or mfvsrd to do this conversion directly.
14440 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14441 if (Subtarget.hasDirectMove()) {
14442 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14443 .addReg(SrcReg);
14444 } else {
14445 // Use stack to do the register copy.
14446 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14447 MachineRegisterInfo &RegInfo = F->getRegInfo();
14448 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14449 if (RC == &PPC::F8RCRegClass) {
14450 // Copy register from F8RCRegClass to G8RCRegclass.
14451 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14452 "Unsupported RegClass.");
14453
14454 StoreOp = PPC::STFD;
14455 LoadOp = PPC::LD;
14456 } else {
14457 // Copy register from G8RCRegClass to F8RCRegclass.
14458 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14459 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14460 "Unsupported RegClass.");
14461 }
14462
14463 MachineFrameInfo &MFI = F->getFrameInfo();
14464 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14465
14466 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14467 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14469 MFI.getObjectAlign(FrameIdx));
14470
14471 // Store the SrcReg into the stack.
14472 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14473 .addReg(SrcReg)
14474 .addImm(0)
14475 .addFrameIndex(FrameIdx)
14476 .addMemOperand(MMOStore);
14477
14478 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14479 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14481 MFI.getObjectAlign(FrameIdx));
14482
14483 // Load from the stack where SrcReg is stored, and save to DestReg,
14484 // so we have done the RegClass conversion from RegClass::SrcReg to
14485 // RegClass::DestReg.
14486 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14487 .addImm(0)
14488 .addFrameIndex(FrameIdx)
14489 .addMemOperand(MMOLoad);
14490 }
14491 };
14492
14493 Register OldFPSCRReg = MI.getOperand(0).getReg();
14494
14495 // Save FPSCR value.
14496 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14497
14498 // When the operand is gprc register, use two least significant bits of the
14499 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14500 //
14501 // copy OldFPSCRTmpReg, OldFPSCRReg
14502 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14503 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14504 // copy NewFPSCRReg, NewFPSCRTmpReg
14505 // mtfsf 255, NewFPSCRReg
14506 MachineOperand SrcOp = MI.getOperand(1);
14507 MachineRegisterInfo &RegInfo = F->getRegInfo();
14508 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14509
14510 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14511
14512 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14513 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14514
14515 // The first operand of INSERT_SUBREG should be a register which has
14516 // subregisters, we only care about its RegClass, so we should use an
14517 // IMPLICIT_DEF register.
14518 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14519 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14520 .addReg(ImDefReg)
14521 .add(SrcOp)
14522 .addImm(1);
14523
14524 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14525 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14526 .addReg(OldFPSCRTmpReg)
14527 .addReg(ExtSrcReg)
14528 .addImm(0)
14529 .addImm(62);
14530
14531 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14532 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14533
14534 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14535 // bits of FPSCR.
14536 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14537 .addImm(255)
14538 .addReg(NewFPSCRReg)
14539 .addImm(0)
14540 .addImm(0);
14541 } else if (MI.getOpcode() == PPC::SETFLM) {
14542 DebugLoc Dl = MI.getDebugLoc();
14543
14544 // Result of setflm is previous FPSCR content, so we need to save it first.
14545 Register OldFPSCRReg = MI.getOperand(0).getReg();
14546 if (MRI.use_empty(OldFPSCRReg))
14547 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14548 else
14549 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14550
14551 // Put bits in 32:63 to FPSCR.
14552 Register NewFPSCRReg = MI.getOperand(1).getReg();
14553 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14554 .addImm(255)
14555 .addReg(NewFPSCRReg)
14556 .addImm(0)
14557 .addImm(0);
14558 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14559 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14560 return emitProbedAlloca(MI, BB);
14561 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14562 DebugLoc DL = MI.getDebugLoc();
14563 Register Src = MI.getOperand(2).getReg();
14564 Register Lo = MI.getOperand(0).getReg();
14565 Register Hi = MI.getOperand(1).getReg();
14566 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14567 .addDef(Lo)
14568 .addUse(Src, 0, PPC::sub_gp8_x1);
14569 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14570 .addDef(Hi)
14571 .addUse(Src, 0, PPC::sub_gp8_x0);
14572 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14573 MI.getOpcode() == PPC::STQX_PSEUDO) {
14574 DebugLoc DL = MI.getDebugLoc();
14575 // Ptr is used as the ptr_rc_no_r0 part
14576 // of LQ/STQ's memory operand and adding result of RA and RB,
14577 // so it has to be g8rc_and_g8rc_nox0.
14578 Register Ptr =
14579 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14580 Register Val = MI.getOperand(0).getReg();
14581 Register RA = MI.getOperand(1).getReg();
14582 Register RB = MI.getOperand(2).getReg();
14583 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14584 BuildMI(*BB, MI, DL,
14585 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14586 : TII->get(PPC::STQ))
14587 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14588 .addImm(0)
14589 .addReg(Ptr);
14590 } else {
14591 llvm_unreachable("Unexpected instr type to insert");
14592 }
14593
14594 MI.eraseFromParent(); // The pseudo instruction is gone now.
14595 return BB;
14596}
14597
14598//===----------------------------------------------------------------------===//
14599// Target Optimization Hooks
14600//===----------------------------------------------------------------------===//
14601
14602static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14603 // For the estimates, convergence is quadratic, so we essentially double the
14604 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14605 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14606 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14607 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14608 if (VT.getScalarType() == MVT::f64)
14609 RefinementSteps++;
14610 return RefinementSteps;
14611}
14612
14613SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14614 const DenormalMode &Mode) const {
14615 // We only have VSX Vector Test for software Square Root.
14616 EVT VT = Op.getValueType();
14617 if (!isTypeLegal(MVT::i1) ||
14618 (VT != MVT::f64 &&
14619 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14620 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
14621
14622 SDLoc DL(Op);
14623 // The output register of FTSQRT is CR field.
14624 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14625 // ftsqrt BF,FRB
14626 // Let e_b be the unbiased exponent of the double-precision
14627 // floating-point operand in register FRB.
14628 // fe_flag is set to 1 if either of the following conditions occurs.
14629 // - The double-precision floating-point operand in register FRB is a zero,
14630 // a NaN, or an infinity, or a negative value.
14631 // - e_b is less than or equal to -970.
14632 // Otherwise fe_flag is set to 0.
14633 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14634 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14635 // exponent is less than -970)
14636 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14637 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14638 FTSQRT, SRIdxVal),
14639 0);
14640}
14641
14642SDValue
14643PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14644 SelectionDAG &DAG) const {
14645 // We only have VSX Vector Square Root.
14646 EVT VT = Op.getValueType();
14647 if (VT != MVT::f64 &&
14648 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14650
14651 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14652}
14653
14654SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14655 int Enabled, int &RefinementSteps,
14656 bool &UseOneConstNR,
14657 bool Reciprocal) const {
14658 EVT VT = Operand.getValueType();
14659 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14660 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14661 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14662 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14663 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14664 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14665
14666 // The Newton-Raphson computation with a single constant does not provide
14667 // enough accuracy on some CPUs.
14668 UseOneConstNR = !Subtarget.needsTwoConstNR();
14669 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14670 }
14671 return SDValue();
14672}
14673
14674SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14675 int Enabled,
14676 int &RefinementSteps) const {
14677 EVT VT = Operand.getValueType();
14678 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14679 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14680 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14681 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14682 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14683 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14684 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14685 }
14686 return SDValue();
14687}
14688
14689unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14690 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14691 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14692 // enabled for division), this functionality is redundant with the default
14693 // combiner logic (once the division -> reciprocal/multiply transformation
14694 // has taken place). As a result, this matters more for older cores than for
14695 // newer ones.
14696
14697 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14698 // reciprocal if there are two or more FDIVs (for embedded cores with only
14699 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14700 switch (Subtarget.getCPUDirective()) {
14701 default:
14702 return 3;
14703 case PPC::DIR_440:
14704 case PPC::DIR_A2:
14705 case PPC::DIR_E500:
14706 case PPC::DIR_E500mc:
14707 case PPC::DIR_E5500:
14708 return 2;
14709 }
14710}
14711
14712// isConsecutiveLSLoc needs to work even if all adds have not yet been
14713// collapsed, and so we need to look through chains of them.
14715 int64_t& Offset, SelectionDAG &DAG) {
14716 if (DAG.isBaseWithConstantOffset(Loc)) {
14717 Base = Loc.getOperand(0);
14718 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14719
14720 // The base might itself be a base plus an offset, and if so, accumulate
14721 // that as well.
14723 }
14724}
14725
14727 unsigned Bytes, int Dist,
14728 SelectionDAG &DAG) {
14729 if (VT.getSizeInBits() / 8 != Bytes)
14730 return false;
14731
14732 SDValue BaseLoc = Base->getBasePtr();
14733 if (Loc.getOpcode() == ISD::FrameIndex) {
14734 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14735 return false;
14737 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14738 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14739 int FS = MFI.getObjectSize(FI);
14740 int BFS = MFI.getObjectSize(BFI);
14741 if (FS != BFS || FS != (int)Bytes) return false;
14742 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14743 }
14744
14745 SDValue Base1 = Loc, Base2 = BaseLoc;
14746 int64_t Offset1 = 0, Offset2 = 0;
14747 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14748 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14749 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14750 return true;
14751
14752 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14753 const GlobalValue *GV1 = nullptr;
14754 const GlobalValue *GV2 = nullptr;
14755 Offset1 = 0;
14756 Offset2 = 0;
14757 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14758 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14759 if (isGA1 && isGA2 && GV1 == GV2)
14760 return Offset1 == (Offset2 + Dist*Bytes);
14761 return false;
14762}
14763
14764// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14765// not enforce equality of the chain operands.
14767 unsigned Bytes, int Dist,
14768 SelectionDAG &DAG) {
14769 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
14770 EVT VT = LS->getMemoryVT();
14771 SDValue Loc = LS->getBasePtr();
14772 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14773 }
14774
14775 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14776 EVT VT;
14777 switch (N->getConstantOperandVal(1)) {
14778 default: return false;
14779 case Intrinsic::ppc_altivec_lvx:
14780 case Intrinsic::ppc_altivec_lvxl:
14781 case Intrinsic::ppc_vsx_lxvw4x:
14782 case Intrinsic::ppc_vsx_lxvw4x_be:
14783 VT = MVT::v4i32;
14784 break;
14785 case Intrinsic::ppc_vsx_lxvd2x:
14786 case Intrinsic::ppc_vsx_lxvd2x_be:
14787 VT = MVT::v2f64;
14788 break;
14789 case Intrinsic::ppc_altivec_lvebx:
14790 VT = MVT::i8;
14791 break;
14792 case Intrinsic::ppc_altivec_lvehx:
14793 VT = MVT::i16;
14794 break;
14795 case Intrinsic::ppc_altivec_lvewx:
14796 VT = MVT::i32;
14797 break;
14798 }
14799
14800 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14801 }
14802
14803 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14804 EVT VT;
14805 switch (N->getConstantOperandVal(1)) {
14806 default: return false;
14807 case Intrinsic::ppc_altivec_stvx:
14808 case Intrinsic::ppc_altivec_stvxl:
14809 case Intrinsic::ppc_vsx_stxvw4x:
14810 VT = MVT::v4i32;
14811 break;
14812 case Intrinsic::ppc_vsx_stxvd2x:
14813 VT = MVT::v2f64;
14814 break;
14815 case Intrinsic::ppc_vsx_stxvw4x_be:
14816 VT = MVT::v4i32;
14817 break;
14818 case Intrinsic::ppc_vsx_stxvd2x_be:
14819 VT = MVT::v2f64;
14820 break;
14821 case Intrinsic::ppc_altivec_stvebx:
14822 VT = MVT::i8;
14823 break;
14824 case Intrinsic::ppc_altivec_stvehx:
14825 VT = MVT::i16;
14826 break;
14827 case Intrinsic::ppc_altivec_stvewx:
14828 VT = MVT::i32;
14829 break;
14830 }
14831
14832 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14833 }
14834
14835 return false;
14836}
14837
14838// Return true is there is a nearyby consecutive load to the one provided
14839// (regardless of alignment). We search up and down the chain, looking though
14840// token factors and other loads (but nothing else). As a result, a true result
14841// indicates that it is safe to create a new consecutive load adjacent to the
14842// load provided.
14844 SDValue Chain = LD->getChain();
14845 EVT VT = LD->getMemoryVT();
14846
14847 SmallPtrSet<SDNode *, 16> LoadRoots;
14848 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14850
14851 // First, search up the chain, branching to follow all token-factor operands.
14852 // If we find a consecutive load, then we're done, otherwise, record all
14853 // nodes just above the top-level loads and token factors.
14854 while (!Queue.empty()) {
14855 SDNode *ChainNext = Queue.pop_back_val();
14856 if (!Visited.insert(ChainNext).second)
14857 continue;
14858
14859 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14860 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14861 return true;
14862
14863 if (!Visited.count(ChainLD->getChain().getNode()))
14864 Queue.push_back(ChainLD->getChain().getNode());
14865 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14866 for (const SDUse &O : ChainNext->ops())
14867 if (!Visited.count(O.getNode()))
14868 Queue.push_back(O.getNode());
14869 } else
14870 LoadRoots.insert(ChainNext);
14871 }
14872
14873 // Second, search down the chain, starting from the top-level nodes recorded
14874 // in the first phase. These top-level nodes are the nodes just above all
14875 // loads and token factors. Starting with their uses, recursively look though
14876 // all loads (just the chain uses) and token factors to find a consecutive
14877 // load.
14878 Visited.clear();
14879 Queue.clear();
14880
14881 for (SDNode *I : LoadRoots) {
14882 Queue.push_back(I);
14883
14884 while (!Queue.empty()) {
14885 SDNode *LoadRoot = Queue.pop_back_val();
14886 if (!Visited.insert(LoadRoot).second)
14887 continue;
14888
14889 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14890 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14891 return true;
14892
14893 for (SDNode *U : LoadRoot->users())
14894 if (((isa<MemSDNode>(U) &&
14895 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14896 U->getOpcode() == ISD::TokenFactor) &&
14897 !Visited.count(U))
14898 Queue.push_back(U);
14899 }
14900 }
14901
14902 return false;
14903}
14904
14905/// This function is called when we have proved that a SETCC node can be replaced
14906/// by subtraction (and other supporting instructions) so that the result of
14907/// comparison is kept in a GPR instead of CR. This function is purely for
14908/// codegen purposes and has some flags to guide the codegen process.
14909static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14910 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14911 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14912
14913 // Zero extend the operands to the largest legal integer. Originally, they
14914 // must be of a strictly smaller size.
14915 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14916 DAG.getConstant(Size, DL, MVT::i32));
14917 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14918 DAG.getConstant(Size, DL, MVT::i32));
14919
14920 // Swap if needed. Depends on the condition code.
14921 if (Swap)
14922 std::swap(Op0, Op1);
14923
14924 // Subtract extended integers.
14925 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14926
14927 // Move the sign bit to the least significant position and zero out the rest.
14928 // Now the least significant bit carries the result of original comparison.
14929 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14930 DAG.getConstant(Size - 1, DL, MVT::i32));
14931 auto Final = Shifted;
14932
14933 // Complement the result if needed. Based on the condition code.
14934 if (Complement)
14935 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14936 DAG.getConstant(1, DL, MVT::i64));
14937
14938 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14939}
14940
14941SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14942 DAGCombinerInfo &DCI) const {
14943 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14944
14945 SelectionDAG &DAG = DCI.DAG;
14946 SDLoc DL(N);
14947
14948 // Size of integers being compared has a critical role in the following
14949 // analysis, so we prefer to do this when all types are legal.
14950 if (!DCI.isAfterLegalizeDAG())
14951 return SDValue();
14952
14953 // If all users of SETCC extend its value to a legal integer type
14954 // then we replace SETCC with a subtraction
14955 for (const SDNode *U : N->users())
14956 if (U->getOpcode() != ISD::ZERO_EXTEND)
14957 return SDValue();
14958
14959 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14960 auto OpSize = N->getOperand(0).getValueSizeInBits();
14961
14963
14964 if (OpSize < Size) {
14965 switch (CC) {
14966 default: break;
14967 case ISD::SETULT:
14968 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14969 case ISD::SETULE:
14970 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14971 case ISD::SETUGT:
14972 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14973 case ISD::SETUGE:
14974 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14975 }
14976 }
14977
14978 return SDValue();
14979}
14980
14981SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14982 DAGCombinerInfo &DCI) const {
14983 SelectionDAG &DAG = DCI.DAG;
14984 SDLoc dl(N);
14985
14986 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14987 // If we're tracking CR bits, we need to be careful that we don't have:
14988 // trunc(binary-ops(zext(x), zext(y)))
14989 // or
14990 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14991 // such that we're unnecessarily moving things into GPRs when it would be
14992 // better to keep them in CR bits.
14993
14994 // Note that trunc here can be an actual i1 trunc, or can be the effective
14995 // truncation that comes from a setcc or select_cc.
14996 if (N->getOpcode() == ISD::TRUNCATE &&
14997 N->getValueType(0) != MVT::i1)
14998 return SDValue();
14999
15000 if (N->getOperand(0).getValueType() != MVT::i32 &&
15001 N->getOperand(0).getValueType() != MVT::i64)
15002 return SDValue();
15003
15004 if (N->getOpcode() == ISD::SETCC ||
15005 N->getOpcode() == ISD::SELECT_CC) {
15006 // If we're looking at a comparison, then we need to make sure that the
15007 // high bits (all except for the first) don't matter the result.
15008 ISD::CondCode CC =
15009 cast<CondCodeSDNode>(N->getOperand(
15010 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15011 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15012
15013 if (ISD::isSignedIntSetCC(CC)) {
15014 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15015 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15016 return SDValue();
15017 } else if (ISD::isUnsignedIntSetCC(CC)) {
15018 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15019 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15020 !DAG.MaskedValueIsZero(N->getOperand(1),
15021 APInt::getHighBitsSet(OpBits, OpBits-1)))
15022 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15023 : SDValue());
15024 } else {
15025 // This is neither a signed nor an unsigned comparison, just make sure
15026 // that the high bits are equal.
15027 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15028 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15029
15030 // We don't really care about what is known about the first bit (if
15031 // anything), so pretend that it is known zero for both to ensure they can
15032 // be compared as constants.
15033 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15034 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15035
15036 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15037 Op1Known.getConstant() != Op2Known.getConstant())
15038 return SDValue();
15039 }
15040 }
15041
15042 // We now know that the higher-order bits are irrelevant, we just need to
15043 // make sure that all of the intermediate operations are bit operations, and
15044 // all inputs are extensions.
15045 if (N->getOperand(0).getOpcode() != ISD::AND &&
15046 N->getOperand(0).getOpcode() != ISD::OR &&
15047 N->getOperand(0).getOpcode() != ISD::XOR &&
15048 N->getOperand(0).getOpcode() != ISD::SELECT &&
15049 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15050 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15051 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15052 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15053 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15054 return SDValue();
15055
15056 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15057 N->getOperand(1).getOpcode() != ISD::AND &&
15058 N->getOperand(1).getOpcode() != ISD::OR &&
15059 N->getOperand(1).getOpcode() != ISD::XOR &&
15060 N->getOperand(1).getOpcode() != ISD::SELECT &&
15061 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15062 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15063 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15064 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15065 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15066 return SDValue();
15067
15069 SmallVector<SDValue, 8> BinOps, PromOps;
15071
15072 for (unsigned i = 0; i < 2; ++i) {
15073 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15074 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15075 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15076 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15077 isa<ConstantSDNode>(N->getOperand(i)))
15078 Inputs.push_back(N->getOperand(i));
15079 else
15080 BinOps.push_back(N->getOperand(i));
15081
15082 if (N->getOpcode() == ISD::TRUNCATE)
15083 break;
15084 }
15085
15086 // Visit all inputs, collect all binary operations (and, or, xor and
15087 // select) that are all fed by extensions.
15088 while (!BinOps.empty()) {
15089 SDValue BinOp = BinOps.pop_back_val();
15090
15091 if (!Visited.insert(BinOp.getNode()).second)
15092 continue;
15093
15094 PromOps.push_back(BinOp);
15095
15096 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15097 // The condition of the select is not promoted.
15098 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15099 continue;
15100 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15101 continue;
15102
15103 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15104 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15105 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15106 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15107 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15108 Inputs.push_back(BinOp.getOperand(i));
15109 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15110 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15111 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15112 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15113 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15114 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15115 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15116 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15117 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15118 BinOps.push_back(BinOp.getOperand(i));
15119 } else {
15120 // We have an input that is not an extension or another binary
15121 // operation; we'll abort this transformation.
15122 return SDValue();
15123 }
15124 }
15125 }
15126
15127 // Make sure that this is a self-contained cluster of operations (which
15128 // is not quite the same thing as saying that everything has only one
15129 // use).
15130 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15131 if (isa<ConstantSDNode>(Inputs[i]))
15132 continue;
15133
15134 for (const SDNode *User : Inputs[i].getNode()->users()) {
15135 if (User != N && !Visited.count(User))
15136 return SDValue();
15137
15138 // Make sure that we're not going to promote the non-output-value
15139 // operand(s) or SELECT or SELECT_CC.
15140 // FIXME: Although we could sometimes handle this, and it does occur in
15141 // practice that one of the condition inputs to the select is also one of
15142 // the outputs, we currently can't deal with this.
15143 if (User->getOpcode() == ISD::SELECT) {
15144 if (User->getOperand(0) == Inputs[i])
15145 return SDValue();
15146 } else if (User->getOpcode() == ISD::SELECT_CC) {
15147 if (User->getOperand(0) == Inputs[i] ||
15148 User->getOperand(1) == Inputs[i])
15149 return SDValue();
15150 }
15151 }
15152 }
15153
15154 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15155 for (const SDNode *User : PromOps[i].getNode()->users()) {
15156 if (User != N && !Visited.count(User))
15157 return SDValue();
15158
15159 // Make sure that we're not going to promote the non-output-value
15160 // operand(s) or SELECT or SELECT_CC.
15161 // FIXME: Although we could sometimes handle this, and it does occur in
15162 // practice that one of the condition inputs to the select is also one of
15163 // the outputs, we currently can't deal with this.
15164 if (User->getOpcode() == ISD::SELECT) {
15165 if (User->getOperand(0) == PromOps[i])
15166 return SDValue();
15167 } else if (User->getOpcode() == ISD::SELECT_CC) {
15168 if (User->getOperand(0) == PromOps[i] ||
15169 User->getOperand(1) == PromOps[i])
15170 return SDValue();
15171 }
15172 }
15173 }
15174
15175 // Replace all inputs with the extension operand.
15176 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15177 // Constants may have users outside the cluster of to-be-promoted nodes,
15178 // and so we need to replace those as we do the promotions.
15179 if (isa<ConstantSDNode>(Inputs[i]))
15180 continue;
15181 else
15182 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15183 }
15184
15185 std::list<HandleSDNode> PromOpHandles;
15186 for (auto &PromOp : PromOps)
15187 PromOpHandles.emplace_back(PromOp);
15188
15189 // Replace all operations (these are all the same, but have a different
15190 // (i1) return type). DAG.getNode will validate that the types of
15191 // a binary operator match, so go through the list in reverse so that
15192 // we've likely promoted both operands first. Any intermediate truncations or
15193 // extensions disappear.
15194 while (!PromOpHandles.empty()) {
15195 SDValue PromOp = PromOpHandles.back().getValue();
15196 PromOpHandles.pop_back();
15197
15198 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15199 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15200 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15201 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15202 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15203 PromOp.getOperand(0).getValueType() != MVT::i1) {
15204 // The operand is not yet ready (see comment below).
15205 PromOpHandles.emplace_front(PromOp);
15206 continue;
15207 }
15208
15209 SDValue RepValue = PromOp.getOperand(0);
15210 if (isa<ConstantSDNode>(RepValue))
15211 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15212
15213 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15214 continue;
15215 }
15216
15217 unsigned C;
15218 switch (PromOp.getOpcode()) {
15219 default: C = 0; break;
15220 case ISD::SELECT: C = 1; break;
15221 case ISD::SELECT_CC: C = 2; break;
15222 }
15223
15224 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15225 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15226 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15227 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15228 // The to-be-promoted operands of this node have not yet been
15229 // promoted (this should be rare because we're going through the
15230 // list backward, but if one of the operands has several users in
15231 // this cluster of to-be-promoted nodes, it is possible).
15232 PromOpHandles.emplace_front(PromOp);
15233 continue;
15234 }
15235
15236 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15237
15238 // If there are any constant inputs, make sure they're replaced now.
15239 for (unsigned i = 0; i < 2; ++i)
15240 if (isa<ConstantSDNode>(Ops[C+i]))
15241 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15242
15243 DAG.ReplaceAllUsesOfValueWith(PromOp,
15244 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15245 }
15246
15247 // Now we're left with the initial truncation itself.
15248 if (N->getOpcode() == ISD::TRUNCATE)
15249 return N->getOperand(0);
15250
15251 // Otherwise, this is a comparison. The operands to be compared have just
15252 // changed type (to i1), but everything else is the same.
15253 return SDValue(N, 0);
15254}
15255
15256SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15257 DAGCombinerInfo &DCI) const {
15258 SelectionDAG &DAG = DCI.DAG;
15259 SDLoc dl(N);
15260
15261 // If we're tracking CR bits, we need to be careful that we don't have:
15262 // zext(binary-ops(trunc(x), trunc(y)))
15263 // or
15264 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15265 // such that we're unnecessarily moving things into CR bits that can more
15266 // efficiently stay in GPRs. Note that if we're not certain that the high
15267 // bits are set as required by the final extension, we still may need to do
15268 // some masking to get the proper behavior.
15269
15270 // This same functionality is important on PPC64 when dealing with
15271 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15272 // the return values of functions. Because it is so similar, it is handled
15273 // here as well.
15274
15275 if (N->getValueType(0) != MVT::i32 &&
15276 N->getValueType(0) != MVT::i64)
15277 return SDValue();
15278
15279 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15280 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15281 return SDValue();
15282
15283 if (N->getOperand(0).getOpcode() != ISD::AND &&
15284 N->getOperand(0).getOpcode() != ISD::OR &&
15285 N->getOperand(0).getOpcode() != ISD::XOR &&
15286 N->getOperand(0).getOpcode() != ISD::SELECT &&
15287 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15288 return SDValue();
15289
15291 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15293
15294 // Visit all inputs, collect all binary operations (and, or, xor and
15295 // select) that are all fed by truncations.
15296 while (!BinOps.empty()) {
15297 SDValue BinOp = BinOps.pop_back_val();
15298
15299 if (!Visited.insert(BinOp.getNode()).second)
15300 continue;
15301
15302 PromOps.push_back(BinOp);
15303
15304 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15305 // The condition of the select is not promoted.
15306 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15307 continue;
15308 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15309 continue;
15310
15311 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15312 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15313 Inputs.push_back(BinOp.getOperand(i));
15314 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15315 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15316 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15317 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15318 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15319 BinOps.push_back(BinOp.getOperand(i));
15320 } else {
15321 // We have an input that is not a truncation or another binary
15322 // operation; we'll abort this transformation.
15323 return SDValue();
15324 }
15325 }
15326 }
15327
15328 // The operands of a select that must be truncated when the select is
15329 // promoted because the operand is actually part of the to-be-promoted set.
15330 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15331
15332 // Make sure that this is a self-contained cluster of operations (which
15333 // is not quite the same thing as saying that everything has only one
15334 // use).
15335 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15336 if (isa<ConstantSDNode>(Inputs[i]))
15337 continue;
15338
15339 for (SDNode *User : Inputs[i].getNode()->users()) {
15340 if (User != N && !Visited.count(User))
15341 return SDValue();
15342
15343 // If we're going to promote the non-output-value operand(s) or SELECT or
15344 // SELECT_CC, record them for truncation.
15345 if (User->getOpcode() == ISD::SELECT) {
15346 if (User->getOperand(0) == Inputs[i])
15347 SelectTruncOp[0].insert(std::make_pair(User,
15348 User->getOperand(0).getValueType()));
15349 } else if (User->getOpcode() == ISD::SELECT_CC) {
15350 if (User->getOperand(0) == Inputs[i])
15351 SelectTruncOp[0].insert(std::make_pair(User,
15352 User->getOperand(0).getValueType()));
15353 if (User->getOperand(1) == Inputs[i])
15354 SelectTruncOp[1].insert(std::make_pair(User,
15355 User->getOperand(1).getValueType()));
15356 }
15357 }
15358 }
15359
15360 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15361 for (SDNode *User : PromOps[i].getNode()->users()) {
15362 if (User != N && !Visited.count(User))
15363 return SDValue();
15364
15365 // If we're going to promote the non-output-value operand(s) or SELECT or
15366 // SELECT_CC, record them for truncation.
15367 if (User->getOpcode() == ISD::SELECT) {
15368 if (User->getOperand(0) == PromOps[i])
15369 SelectTruncOp[0].insert(std::make_pair(User,
15370 User->getOperand(0).getValueType()));
15371 } else if (User->getOpcode() == ISD::SELECT_CC) {
15372 if (User->getOperand(0) == PromOps[i])
15373 SelectTruncOp[0].insert(std::make_pair(User,
15374 User->getOperand(0).getValueType()));
15375 if (User->getOperand(1) == PromOps[i])
15376 SelectTruncOp[1].insert(std::make_pair(User,
15377 User->getOperand(1).getValueType()));
15378 }
15379 }
15380 }
15381
15382 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15383 bool ReallyNeedsExt = false;
15384 if (N->getOpcode() != ISD::ANY_EXTEND) {
15385 // If all of the inputs are not already sign/zero extended, then
15386 // we'll still need to do that at the end.
15387 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15388 if (isa<ConstantSDNode>(Inputs[i]))
15389 continue;
15390
15391 unsigned OpBits =
15392 Inputs[i].getOperand(0).getValueSizeInBits();
15393 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15394
15395 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15396 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15397 APInt::getHighBitsSet(OpBits,
15398 OpBits-PromBits))) ||
15399 (N->getOpcode() == ISD::SIGN_EXTEND &&
15400 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15401 (OpBits-(PromBits-1)))) {
15402 ReallyNeedsExt = true;
15403 break;
15404 }
15405 }
15406 }
15407
15408 // Replace all inputs, either with the truncation operand, or a
15409 // truncation or extension to the final output type.
15410 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15411 // Constant inputs need to be replaced with the to-be-promoted nodes that
15412 // use them because they might have users outside of the cluster of
15413 // promoted nodes.
15414 if (isa<ConstantSDNode>(Inputs[i]))
15415 continue;
15416
15417 SDValue InSrc = Inputs[i].getOperand(0);
15418 if (Inputs[i].getValueType() == N->getValueType(0))
15419 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15420 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15421 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15422 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15423 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15424 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15425 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15426 else
15427 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15428 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15429 }
15430
15431 std::list<HandleSDNode> PromOpHandles;
15432 for (auto &PromOp : PromOps)
15433 PromOpHandles.emplace_back(PromOp);
15434
15435 // Replace all operations (these are all the same, but have a different
15436 // (promoted) return type). DAG.getNode will validate that the types of
15437 // a binary operator match, so go through the list in reverse so that
15438 // we've likely promoted both operands first.
15439 while (!PromOpHandles.empty()) {
15440 SDValue PromOp = PromOpHandles.back().getValue();
15441 PromOpHandles.pop_back();
15442
15443 unsigned C;
15444 switch (PromOp.getOpcode()) {
15445 default: C = 0; break;
15446 case ISD::SELECT: C = 1; break;
15447 case ISD::SELECT_CC: C = 2; break;
15448 }
15449
15450 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15451 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15452 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15453 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15454 // The to-be-promoted operands of this node have not yet been
15455 // promoted (this should be rare because we're going through the
15456 // list backward, but if one of the operands has several users in
15457 // this cluster of to-be-promoted nodes, it is possible).
15458 PromOpHandles.emplace_front(PromOp);
15459 continue;
15460 }
15461
15462 // For SELECT and SELECT_CC nodes, we do a similar check for any
15463 // to-be-promoted comparison inputs.
15464 if (PromOp.getOpcode() == ISD::SELECT ||
15465 PromOp.getOpcode() == ISD::SELECT_CC) {
15466 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15467 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15468 (SelectTruncOp[1].count(PromOp.getNode()) &&
15469 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15470 PromOpHandles.emplace_front(PromOp);
15471 continue;
15472 }
15473 }
15474
15475 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15476
15477 // If this node has constant inputs, then they'll need to be promoted here.
15478 for (unsigned i = 0; i < 2; ++i) {
15479 if (!isa<ConstantSDNode>(Ops[C+i]))
15480 continue;
15481 if (Ops[C+i].getValueType() == N->getValueType(0))
15482 continue;
15483
15484 if (N->getOpcode() == ISD::SIGN_EXTEND)
15485 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15486 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15487 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15488 else
15489 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15490 }
15491
15492 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15493 // truncate them again to the original value type.
15494 if (PromOp.getOpcode() == ISD::SELECT ||
15495 PromOp.getOpcode() == ISD::SELECT_CC) {
15496 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15497 if (SI0 != SelectTruncOp[0].end())
15498 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15499 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15500 if (SI1 != SelectTruncOp[1].end())
15501 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15502 }
15503
15504 DAG.ReplaceAllUsesOfValueWith(PromOp,
15505 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15506 }
15507
15508 // Now we're left with the initial extension itself.
15509 if (!ReallyNeedsExt)
15510 return N->getOperand(0);
15511
15512 // To zero extend, just mask off everything except for the first bit (in the
15513 // i1 case).
15514 if (N->getOpcode() == ISD::ZERO_EXTEND)
15515 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15517 N->getValueSizeInBits(0), PromBits),
15518 dl, N->getValueType(0)));
15519
15520 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15521 "Invalid extension type");
15522 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15523 SDValue ShiftCst =
15524 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15525 return DAG.getNode(
15526 ISD::SRA, dl, N->getValueType(0),
15527 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15528 ShiftCst);
15529}
15530
15531SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15532 DAGCombinerInfo &DCI) const {
15533 assert(N->getOpcode() == ISD::SETCC &&
15534 "Should be called with a SETCC node");
15535
15536 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15537 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15538 SDValue LHS = N->getOperand(0);
15539 SDValue RHS = N->getOperand(1);
15540
15541 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15542 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15543 LHS.hasOneUse())
15544 std::swap(LHS, RHS);
15545
15546 // x == 0-y --> x+y == 0
15547 // x != 0-y --> x+y != 0
15548 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15549 RHS.hasOneUse()) {
15550 SDLoc DL(N);
15551 SelectionDAG &DAG = DCI.DAG;
15552 EVT VT = N->getValueType(0);
15553 EVT OpVT = LHS.getValueType();
15554 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15555 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15556 }
15557 }
15558
15559 return DAGCombineTruncBoolExt(N, DCI);
15560}
15561
15562// Is this an extending load from an f32 to an f64?
15563static bool isFPExtLoad(SDValue Op) {
15564 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15565 return LD->getExtensionType() == ISD::EXTLOAD &&
15566 Op.getValueType() == MVT::f64;
15567 return false;
15568}
15569
15570/// Reduces the number of fp-to-int conversion when building a vector.
15571///
15572/// If this vector is built out of floating to integer conversions,
15573/// transform it to a vector built out of floating point values followed by a
15574/// single floating to integer conversion of the vector.
15575/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15576/// becomes (fptosi (build_vector ($A, $B, ...)))
15577SDValue PPCTargetLowering::
15578combineElementTruncationToVectorTruncation(SDNode *N,
15579 DAGCombinerInfo &DCI) const {
15580 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15581 "Should be called with a BUILD_VECTOR node");
15582
15583 SelectionDAG &DAG = DCI.DAG;
15584 SDLoc dl(N);
15585
15586 SDValue FirstInput = N->getOperand(0);
15587 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15588 "The input operand must be an fp-to-int conversion.");
15589
15590 // This combine happens after legalization so the fp_to_[su]i nodes are
15591 // already converted to PPCSISD nodes.
15592 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15593 if (FirstConversion == PPCISD::FCTIDZ ||
15594 FirstConversion == PPCISD::FCTIDUZ ||
15595 FirstConversion == PPCISD::FCTIWZ ||
15596 FirstConversion == PPCISD::FCTIWUZ) {
15597 bool IsSplat = true;
15598 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15599 FirstConversion == PPCISD::FCTIWUZ;
15600 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15602 EVT TargetVT = N->getValueType(0);
15603 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15604 SDValue NextOp = N->getOperand(i);
15605 if (NextOp.getOpcode() != PPCISD::MFVSR)
15606 return SDValue();
15607 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15608 if (NextConversion != FirstConversion)
15609 return SDValue();
15610 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15611 // This is not valid if the input was originally double precision. It is
15612 // also not profitable to do unless this is an extending load in which
15613 // case doing this combine will allow us to combine consecutive loads.
15614 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15615 return SDValue();
15616 if (N->getOperand(i) != FirstInput)
15617 IsSplat = false;
15618 }
15619
15620 // If this is a splat, we leave it as-is since there will be only a single
15621 // fp-to-int conversion followed by a splat of the integer. This is better
15622 // for 32-bit and smaller ints and neutral for 64-bit ints.
15623 if (IsSplat)
15624 return SDValue();
15625
15626 // Now that we know we have the right type of node, get its operands
15627 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15628 SDValue In = N->getOperand(i).getOperand(0);
15629 if (Is32Bit) {
15630 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15631 // here, we know that all inputs are extending loads so this is safe).
15632 if (In.isUndef())
15633 Ops.push_back(DAG.getUNDEF(SrcVT));
15634 else {
15635 SDValue Trunc =
15636 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15637 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15638 Ops.push_back(Trunc);
15639 }
15640 } else
15641 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15642 }
15643
15644 unsigned Opcode;
15645 if (FirstConversion == PPCISD::FCTIDZ ||
15646 FirstConversion == PPCISD::FCTIWZ)
15647 Opcode = ISD::FP_TO_SINT;
15648 else
15649 Opcode = ISD::FP_TO_UINT;
15650
15651 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15652 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15653 return DAG.getNode(Opcode, dl, TargetVT, BV);
15654 }
15655 return SDValue();
15656}
15657
15658/// Reduce the number of loads when building a vector.
15659///
15660/// Building a vector out of multiple loads can be converted to a load
15661/// of the vector type if the loads are consecutive. If the loads are
15662/// consecutive but in descending order, a shuffle is added at the end
15663/// to reorder the vector.
15665 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15666 "Should be called with a BUILD_VECTOR node");
15667
15668 SDLoc dl(N);
15669
15670 // Return early for non byte-sized type, as they can't be consecutive.
15671 if (!N->getValueType(0).getVectorElementType().isByteSized())
15672 return SDValue();
15673
15674 bool InputsAreConsecutiveLoads = true;
15675 bool InputsAreReverseConsecutive = true;
15676 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15677 SDValue FirstInput = N->getOperand(0);
15678 bool IsRoundOfExtLoad = false;
15679 LoadSDNode *FirstLoad = nullptr;
15680
15681 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15682 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15683 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15684 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15685 }
15686 // Not a build vector of (possibly fp_rounded) loads.
15687 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15688 N->getNumOperands() == 1)
15689 return SDValue();
15690
15691 if (!IsRoundOfExtLoad)
15692 FirstLoad = cast<LoadSDNode>(FirstInput);
15693
15695 InputLoads.push_back(FirstLoad);
15696 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15697 // If any inputs are fp_round(extload), they all must be.
15698 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15699 return SDValue();
15700
15701 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15702 N->getOperand(i);
15703 if (NextInput.getOpcode() != ISD::LOAD)
15704 return SDValue();
15705
15706 SDValue PreviousInput =
15707 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15708 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15709 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15710
15711 // If any inputs are fp_round(extload), they all must be.
15712 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15713 return SDValue();
15714
15715 // We only care about regular loads. The PPC-specific load intrinsics
15716 // will not lead to a merge opportunity.
15717 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15718 InputsAreConsecutiveLoads = false;
15719 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15720 InputsAreReverseConsecutive = false;
15721
15722 // Exit early if the loads are neither consecutive nor reverse consecutive.
15723 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15724 return SDValue();
15725 InputLoads.push_back(LD2);
15726 }
15727
15728 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15729 "The loads cannot be both consecutive and reverse consecutive.");
15730
15731 SDValue WideLoad;
15732 SDValue ReturnSDVal;
15733 if (InputsAreConsecutiveLoads) {
15734 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15735 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15736 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15737 FirstLoad->getAlign());
15738 ReturnSDVal = WideLoad;
15739 } else if (InputsAreReverseConsecutive) {
15740 LoadSDNode *LastLoad = InputLoads.back();
15741 assert(LastLoad && "Input needs to be a LoadSDNode.");
15742 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15743 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15744 LastLoad->getAlign());
15746 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15747 Ops.push_back(i);
15748
15749 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15750 DAG.getUNDEF(N->getValueType(0)), Ops);
15751 } else
15752 return SDValue();
15753
15754 for (auto *LD : InputLoads)
15755 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15756 return ReturnSDVal;
15757}
15758
15759// This function adds the required vector_shuffle needed to get
15760// the elements of the vector extract in the correct position
15761// as specified by the CorrectElems encoding.
15763 SDValue Input, uint64_t Elems,
15764 uint64_t CorrectElems) {
15765 SDLoc dl(N);
15766
15767 unsigned NumElems = Input.getValueType().getVectorNumElements();
15768 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15769
15770 // Knowing the element indices being extracted from the original
15771 // vector and the order in which they're being inserted, just put
15772 // them at element indices required for the instruction.
15773 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15774 if (DAG.getDataLayout().isLittleEndian())
15775 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15776 else
15777 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15778 CorrectElems = CorrectElems >> 8;
15779 Elems = Elems >> 8;
15780 }
15781
15782 SDValue Shuffle =
15783 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15784 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15785
15786 EVT VT = N->getValueType(0);
15787 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15788
15789 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15792 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15793 DAG.getValueType(ExtVT));
15794}
15795
15796// Look for build vector patterns where input operands come from sign
15797// extended vector_extract elements of specific indices. If the correct indices
15798// aren't used, add a vector shuffle to fix up the indices and create
15799// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15800// during instruction selection.
15802 // This array encodes the indices that the vector sign extend instructions
15803 // extract from when extending from one type to another for both BE and LE.
15804 // The right nibble of each byte corresponds to the LE incides.
15805 // and the left nibble of each byte corresponds to the BE incides.
15806 // For example: 0x3074B8FC byte->word
15807 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15808 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15809 // For example: 0x000070F8 byte->double word
15810 // For LE: the allowed indices are: 0x0,0x8
15811 // For BE: the allowed indices are: 0x7,0xF
15812 uint64_t TargetElems[] = {
15813 0x3074B8FC, // b->w
15814 0x000070F8, // b->d
15815 0x10325476, // h->w
15816 0x00003074, // h->d
15817 0x00001032, // w->d
15818 };
15819
15820 uint64_t Elems = 0;
15821 int Index;
15822 SDValue Input;
15823
15824 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15825 if (!Op)
15826 return false;
15827 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15828 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15829 return false;
15830
15831 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15832 // of the right width.
15833 SDValue Extract = Op.getOperand(0);
15834 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15835 Extract = Extract.getOperand(0);
15836 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15837 return false;
15838
15839 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
15840 if (!ExtOp)
15841 return false;
15842
15843 Index = ExtOp->getZExtValue();
15844 if (Input && Input != Extract.getOperand(0))
15845 return false;
15846
15847 if (!Input)
15848 Input = Extract.getOperand(0);
15849
15850 Elems = Elems << 8;
15851 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15852 Elems |= Index;
15853
15854 return true;
15855 };
15856
15857 // If the build vector operands aren't sign extended vector extracts,
15858 // of the same input vector, then return.
15859 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15860 if (!isSExtOfVecExtract(N->getOperand(i))) {
15861 return SDValue();
15862 }
15863 }
15864
15865 // If the vector extract indices are not correct, add the appropriate
15866 // vector_shuffle.
15867 int TgtElemArrayIdx;
15868 int InputSize = Input.getValueType().getScalarSizeInBits();
15869 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15870 if (InputSize + OutputSize == 40)
15871 TgtElemArrayIdx = 0;
15872 else if (InputSize + OutputSize == 72)
15873 TgtElemArrayIdx = 1;
15874 else if (InputSize + OutputSize == 48)
15875 TgtElemArrayIdx = 2;
15876 else if (InputSize + OutputSize == 80)
15877 TgtElemArrayIdx = 3;
15878 else if (InputSize + OutputSize == 96)
15879 TgtElemArrayIdx = 4;
15880 else
15881 return SDValue();
15882
15883 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15884 CorrectElems = DAG.getDataLayout().isLittleEndian()
15885 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15886 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15887 if (Elems != CorrectElems) {
15888 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15889 }
15890
15891 // Regular lowering will catch cases where a shuffle is not needed.
15892 return SDValue();
15893}
15894
15895// Look for the pattern of a load from a narrow width to i128, feeding
15896// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15897// (LXVRZX). This node represents a zero extending load that will be matched
15898// to the Load VSX Vector Rightmost instructions.
15900 SDLoc DL(N);
15901
15902 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15903 if (N->getValueType(0) != MVT::v1i128)
15904 return SDValue();
15905
15906 SDValue Operand = N->getOperand(0);
15907 // Proceed with the transformation if the operand to the BUILD_VECTOR
15908 // is a load instruction.
15909 if (Operand.getOpcode() != ISD::LOAD)
15910 return SDValue();
15911
15912 auto *LD = cast<LoadSDNode>(Operand);
15913 EVT MemoryType = LD->getMemoryVT();
15914
15915 // This transformation is only valid if the we are loading either a byte,
15916 // halfword, word, or doubleword.
15917 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15918 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15919
15920 // Ensure that the load from the narrow width is being zero extended to i128.
15921 if (!ValidLDType ||
15922 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15923 LD->getExtensionType() != ISD::EXTLOAD))
15924 return SDValue();
15925
15926 SDValue LoadOps[] = {
15927 LD->getChain(), LD->getBasePtr(),
15928 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15929
15931 DAG.getVTList(MVT::v1i128, MVT::Other),
15932 LoadOps, MemoryType, LD->getMemOperand());
15933}
15934
15935SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15936 DAGCombinerInfo &DCI) const {
15937 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15938 "Should be called with a BUILD_VECTOR node");
15939
15940 SelectionDAG &DAG = DCI.DAG;
15941 SDLoc dl(N);
15942
15943 if (!Subtarget.hasVSX())
15944 return SDValue();
15945
15946 // The target independent DAG combiner will leave a build_vector of
15947 // float-to-int conversions intact. We can generate MUCH better code for
15948 // a float-to-int conversion of a vector of floats.
15949 SDValue FirstInput = N->getOperand(0);
15950 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15951 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15952 if (Reduced)
15953 return Reduced;
15954 }
15955
15956 // If we're building a vector out of consecutive loads, just load that
15957 // vector type.
15958 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15959 if (Reduced)
15960 return Reduced;
15961
15962 // If we're building a vector out of extended elements from another vector
15963 // we have P9 vector integer extend instructions. The code assumes legal
15964 // input types (i.e. it can't handle things like v4i16) so do not run before
15965 // legalization.
15966 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15967 Reduced = combineBVOfVecSExt(N, DAG);
15968 if (Reduced)
15969 return Reduced;
15970 }
15971
15972 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15973 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15974 // is a load from <valid narrow width> to i128.
15975 if (Subtarget.isISA3_1()) {
15976 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15977 if (BVOfZLoad)
15978 return BVOfZLoad;
15979 }
15980
15981 if (N->getValueType(0) != MVT::v2f64)
15982 return SDValue();
15983
15984 // Looking for:
15985 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15986 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15987 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15988 return SDValue();
15989 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15990 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15991 return SDValue();
15992 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15993 return SDValue();
15994
15995 SDValue Ext1 = FirstInput.getOperand(0);
15996 SDValue Ext2 = N->getOperand(1).getOperand(0);
15997 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15999 return SDValue();
16000
16001 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16002 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16003 if (!Ext1Op || !Ext2Op)
16004 return SDValue();
16005 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16006 Ext1.getOperand(0) != Ext2.getOperand(0))
16007 return SDValue();
16008
16009 int FirstElem = Ext1Op->getZExtValue();
16010 int SecondElem = Ext2Op->getZExtValue();
16011 int SubvecIdx;
16012 if (FirstElem == 0 && SecondElem == 1)
16013 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16014 else if (FirstElem == 2 && SecondElem == 3)
16015 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16016 else
16017 return SDValue();
16018
16019 SDValue SrcVec = Ext1.getOperand(0);
16020 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16022 return DAG.getNode(NodeType, dl, MVT::v2f64,
16023 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16024}
16025
16026SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16027 DAGCombinerInfo &DCI) const {
16028 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16029 N->getOpcode() == ISD::UINT_TO_FP) &&
16030 "Need an int -> FP conversion node here");
16031
16032 if (useSoftFloat() || !Subtarget.has64BitSupport())
16033 return SDValue();
16034
16035 SelectionDAG &DAG = DCI.DAG;
16036 SDLoc dl(N);
16037 SDValue Op(N, 0);
16038
16039 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16040 // from the hardware.
16041 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16042 return SDValue();
16043 if (!Op.getOperand(0).getValueType().isSimple())
16044 return SDValue();
16045 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16046 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16047 return SDValue();
16048
16049 SDValue FirstOperand(Op.getOperand(0));
16050 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16051 (FirstOperand.getValueType() == MVT::i8 ||
16052 FirstOperand.getValueType() == MVT::i16);
16053 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16054 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16055 bool DstDouble = Op.getValueType() == MVT::f64;
16056 unsigned ConvOp = Signed ?
16057 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16058 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16059 SDValue WidthConst =
16060 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16061 dl, false);
16062 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16063 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16065 DAG.getVTList(MVT::f64, MVT::Other),
16066 Ops, MVT::i8, LDN->getMemOperand());
16067 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16068
16069 // For signed conversion, we need to sign-extend the value in the VSR
16070 if (Signed) {
16071 SDValue ExtOps[] = { Ld, WidthConst };
16072 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16073 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16074 } else
16075 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16076 }
16077
16078
16079 // For i32 intermediate values, unfortunately, the conversion functions
16080 // leave the upper 32 bits of the value are undefined. Within the set of
16081 // scalar instructions, we have no method for zero- or sign-extending the
16082 // value. Thus, we cannot handle i32 intermediate values here.
16083 if (Op.getOperand(0).getValueType() == MVT::i32)
16084 return SDValue();
16085
16086 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16087 "UINT_TO_FP is supported only with FPCVT");
16088
16089 // If we have FCFIDS, then use it when converting to single-precision.
16090 // Otherwise, convert to double-precision and then round.
16091 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16092 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16094 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16095 : PPCISD::FCFID);
16096 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16097 ? MVT::f32
16098 : MVT::f64;
16099
16100 // If we're converting from a float, to an int, and back to a float again,
16101 // then we don't need the store/load pair at all.
16102 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16103 Subtarget.hasFPCVT()) ||
16104 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16105 SDValue Src = Op.getOperand(0).getOperand(0);
16106 if (Src.getValueType() == MVT::f32) {
16107 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16108 DCI.AddToWorklist(Src.getNode());
16109 } else if (Src.getValueType() != MVT::f64) {
16110 // Make sure that we don't pick up a ppc_fp128 source value.
16111 return SDValue();
16112 }
16113
16114 unsigned FCTOp =
16115 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16117
16118 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16119 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16120
16121 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16122 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16123 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16124 DCI.AddToWorklist(FP.getNode());
16125 }
16126
16127 return FP;
16128 }
16129
16130 return SDValue();
16131}
16132
16133// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16134// builtins) into loads with swaps.
16136 DAGCombinerInfo &DCI) const {
16137 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16138 // load combines.
16139 if (DCI.isBeforeLegalizeOps())
16140 return SDValue();
16141
16142 SelectionDAG &DAG = DCI.DAG;
16143 SDLoc dl(N);
16144 SDValue Chain;
16145 SDValue Base;
16146 MachineMemOperand *MMO;
16147
16148 switch (N->getOpcode()) {
16149 default:
16150 llvm_unreachable("Unexpected opcode for little endian VSX load");
16151 case ISD::LOAD: {
16152 LoadSDNode *LD = cast<LoadSDNode>(N);
16153 Chain = LD->getChain();
16154 Base = LD->getBasePtr();
16155 MMO = LD->getMemOperand();
16156 // If the MMO suggests this isn't a load of a full vector, leave
16157 // things alone. For a built-in, we have to make the change for
16158 // correctness, so if there is a size problem that will be a bug.
16159 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16160 return SDValue();
16161 break;
16162 }
16164 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
16165 Chain = Intrin->getChain();
16166 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16167 // us what we want. Get operand 2 instead.
16168 Base = Intrin->getOperand(2);
16169 MMO = Intrin->getMemOperand();
16170 break;
16171 }
16172 }
16173
16174 MVT VecTy = N->getValueType(0).getSimpleVT();
16175
16176 SDValue LoadOps[] = { Chain, Base };
16178 DAG.getVTList(MVT::v2f64, MVT::Other),
16179 LoadOps, MVT::v2f64, MMO);
16180
16181 DCI.AddToWorklist(Load.getNode());
16182 Chain = Load.getValue(1);
16183 SDValue Swap = DAG.getNode(
16184 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16185 DCI.AddToWorklist(Swap.getNode());
16186
16187 // Add a bitcast if the resulting load type doesn't match v2f64.
16188 if (VecTy != MVT::v2f64) {
16189 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16190 DCI.AddToWorklist(N.getNode());
16191 // Package {bitcast value, swap's chain} to match Load's shape.
16192 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16193 N, Swap.getValue(1));
16194 }
16195
16196 return Swap;
16197}
16198
16199// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16200// builtins) into stores with swaps.
16202 DAGCombinerInfo &DCI) const {
16203 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16204 // store combines.
16205 if (DCI.isBeforeLegalizeOps())
16206 return SDValue();
16207
16208 SelectionDAG &DAG = DCI.DAG;
16209 SDLoc dl(N);
16210 SDValue Chain;
16211 SDValue Base;
16212 unsigned SrcOpnd;
16213 MachineMemOperand *MMO;
16214
16215 switch (N->getOpcode()) {
16216 default:
16217 llvm_unreachable("Unexpected opcode for little endian VSX store");
16218 case ISD::STORE: {
16219 StoreSDNode *ST = cast<StoreSDNode>(N);
16220 Chain = ST->getChain();
16221 Base = ST->getBasePtr();
16222 MMO = ST->getMemOperand();
16223 SrcOpnd = 1;
16224 // If the MMO suggests this isn't a store of a full vector, leave
16225 // things alone. For a built-in, we have to make the change for
16226 // correctness, so if there is a size problem that will be a bug.
16227 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16228 return SDValue();
16229 break;
16230 }
16231 case ISD::INTRINSIC_VOID: {
16232 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
16233 Chain = Intrin->getChain();
16234 // Intrin->getBasePtr() oddly does not get what we want.
16235 Base = Intrin->getOperand(3);
16236 MMO = Intrin->getMemOperand();
16237 SrcOpnd = 2;
16238 break;
16239 }
16240 }
16241
16242 SDValue Src = N->getOperand(SrcOpnd);
16243 MVT VecTy = Src.getValueType().getSimpleVT();
16244
16245 // All stores are done as v2f64 and possible bit cast.
16246 if (VecTy != MVT::v2f64) {
16247 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16248 DCI.AddToWorklist(Src.getNode());
16249 }
16250
16251 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16252 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16253 DCI.AddToWorklist(Swap.getNode());
16254 Chain = Swap.getValue(1);
16255 SDValue StoreOps[] = { Chain, Swap, Base };
16257 DAG.getVTList(MVT::Other),
16258 StoreOps, VecTy, MMO);
16259 DCI.AddToWorklist(Store.getNode());
16260 return Store;
16261}
16262
16263// Handle DAG combine for STORE (FP_TO_INT F).
16264SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16265 DAGCombinerInfo &DCI) const {
16266 SelectionDAG &DAG = DCI.DAG;
16267 SDLoc dl(N);
16268 unsigned Opcode = N->getOperand(1).getOpcode();
16269 (void)Opcode;
16270 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16271
16272 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16273 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16274 && "Not a FP_TO_INT Instruction!");
16275
16276 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16277 EVT Op1VT = N->getOperand(1).getValueType();
16278 EVT ResVT = Val.getValueType();
16279
16280 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16281 return SDValue();
16282
16283 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16284 bool ValidTypeForStoreFltAsInt =
16285 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16286 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16287
16288 // TODO: Lower conversion from f128 on all VSX targets
16289 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16290 return SDValue();
16291
16292 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16293 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16294 return SDValue();
16295
16296 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16297
16298 // Set number of bytes being converted.
16299 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16300 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16301 DAG.getIntPtrConstant(ByteSize, dl, false),
16302 DAG.getValueType(Op1VT)};
16303
16305 DAG.getVTList(MVT::Other), Ops,
16306 cast<StoreSDNode>(N)->getMemoryVT(),
16307 cast<StoreSDNode>(N)->getMemOperand());
16308
16309 return Val;
16310}
16311
16312static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16313 // Check that the source of the element keeps flipping
16314 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16315 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16316 for (int i = 1, e = Mask.size(); i < e; i++) {
16317 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16318 return false;
16319 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16320 return false;
16321 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16322 }
16323 return true;
16324}
16325
16326static bool isSplatBV(SDValue Op) {
16327 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16328 return false;
16329 SDValue FirstOp;
16330
16331 // Find first non-undef input.
16332 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16333 FirstOp = Op.getOperand(i);
16334 if (!FirstOp.isUndef())
16335 break;
16336 }
16337
16338 // All inputs are undef or the same as the first non-undef input.
16339 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16340 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16341 return false;
16342 return true;
16343}
16344
16346 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16347 return Op;
16348 if (Op.getOpcode() != ISD::BITCAST)
16349 return SDValue();
16350 Op = Op.getOperand(0);
16351 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16352 return Op;
16353 return SDValue();
16354}
16355
16356// Fix up the shuffle mask to account for the fact that the result of
16357// scalar_to_vector is not in lane zero. This just takes all values in
16358// the ranges specified by the min/max indices and adds the number of
16359// elements required to ensure each element comes from the respective
16360// position in the valid lane.
16361// On little endian, that's just the corresponding element in the other
16362// half of the vector. On big endian, it is in the same half but right
16363// justified rather than left justified in that half.
16365 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16366 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16367 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16368 int LHSEltFixup =
16369 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16370 int RHSEltFixup =
16371 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16372 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16373 int Idx = ShuffV[I];
16374 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16375 ShuffV[I] += LHSEltFixup;
16376 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16377 ShuffV[I] += RHSEltFixup;
16378 }
16379}
16380
16381// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16382// the original is:
16383// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16384// In such a case, just change the shuffle mask to extract the element
16385// from the permuted index.
16387 const PPCSubtarget &Subtarget) {
16388 SDLoc dl(OrigSToV);
16389 EVT VT = OrigSToV.getValueType();
16390 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16391 "Expecting a SCALAR_TO_VECTOR here");
16392 SDValue Input = OrigSToV.getOperand(0);
16393
16394 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16395 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16396 SDValue OrigVector = Input.getOperand(0);
16397
16398 // Can't handle non-const element indices or different vector types
16399 // for the input to the extract and the output of the scalar_to_vector.
16400 if (Idx && VT == OrigVector.getValueType()) {
16401 unsigned NumElts = VT.getVectorNumElements();
16402 assert(
16403 NumElts > 1 &&
16404 "Cannot produce a permuted scalar_to_vector for one element vector");
16405 SmallVector<int, 16> NewMask(NumElts, -1);
16406 unsigned ResultInElt = NumElts / 2;
16407 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16408 NewMask[ResultInElt] = Idx->getZExtValue();
16409 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16410 }
16411 }
16412 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16413 OrigSToV.getOperand(0));
16414}
16415
16417 int HalfVec, int LHSLastElementDefined,
16418 int RHSLastElementDefined) {
16419 for (int Index : ShuffV) {
16420 if (Index < 0) // Skip explicitly undefined mask indices.
16421 continue;
16422 // Handle first input vector of the vector_shuffle.
16423 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16424 (Index > LHSLastElementDefined))
16425 return false;
16426 // Handle second input vector of the vector_shuffle.
16427 if ((RHSLastElementDefined >= 0) &&
16428 (Index > HalfVec + RHSLastElementDefined))
16429 return false;
16430 }
16431 return true;
16432}
16433
16435 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16436 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16437 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16438 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16439 // Set up the values for the shuffle vector fixup.
16440 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16441 // The last element depends on if the input comes from the LHS or RHS.
16442 //
16443 // For example:
16444 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16445 //
16446 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16447 // because elements 1 and higher of a scalar_to_vector are undefined.
16448 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16449 // because elements 1 and higher of a scalar_to_vector are undefined.
16450 // It is also not 4 because the original scalar_to_vector is wider and
16451 // actually contains two i32 elements.
16452 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16453 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16454 : FirstElt;
16455 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16456 if (SToVPermuted.getValueType() != VecShuffOperandType)
16457 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16458 return SToVPermuted;
16459}
16460
16461// On little endian subtargets, combine shuffles such as:
16462// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16463// into:
16464// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16465// because the latter can be matched to a single instruction merge.
16466// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16467// to put the value into element zero. Adjust the shuffle mask so that the
16468// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16469// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16470// nodes with elements smaller than doubleword because all the ways
16471// of getting scalar data into a vector register put the value in the
16472// rightmost element of the left half of the vector.
16473SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16474 SelectionDAG &DAG) const {
16475 SDValue LHS = SVN->getOperand(0);
16476 SDValue RHS = SVN->getOperand(1);
16477 auto Mask = SVN->getMask();
16478 int NumElts = LHS.getValueType().getVectorNumElements();
16479 SDValue Res(SVN, 0);
16480 SDLoc dl(SVN);
16481 bool IsLittleEndian = Subtarget.isLittleEndian();
16482
16483 // On big endian targets this is only useful for subtargets with direct moves.
16484 // On little endian targets it would be useful for all subtargets with VSX.
16485 // However adding special handling for LE subtargets without direct moves
16486 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16487 // which includes direct moves.
16488 if (!Subtarget.hasDirectMove())
16489 return Res;
16490
16491 // If this is not a shuffle of a shuffle and the first element comes from
16492 // the second vector, canonicalize to the commuted form. This will make it
16493 // more likely to match one of the single instruction patterns.
16494 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16495 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16496 std::swap(LHS, RHS);
16497 Res = DAG.getCommutedVectorShuffle(*SVN);
16498 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16499 }
16500
16501 // Adjust the shuffle mask if either input vector comes from a
16502 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16503 // form (to prevent the need for a swap).
16504 SmallVector<int, 16> ShuffV(Mask);
16505 SDValue SToVLHS = isScalarToVec(LHS);
16506 SDValue SToVRHS = isScalarToVec(RHS);
16507 if (SToVLHS || SToVRHS) {
16508 EVT VT = SVN->getValueType(0);
16509 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16510 int ShuffleNumElts = ShuffV.size();
16511 int HalfVec = ShuffleNumElts / 2;
16512 // The width of the "valid lane" (i.e. the lane that contains the value that
16513 // is vectorized) needs to be expressed in terms of the number of elements
16514 // of the shuffle. It is thereby the ratio of the values before and after
16515 // any bitcast, which will be set later on if the LHS or RHS are
16516 // SCALAR_TO_VECTOR nodes.
16517 unsigned LHSNumValidElts = HalfVec;
16518 unsigned RHSNumValidElts = HalfVec;
16519
16520 // Initially assume that neither input is permuted. These will be adjusted
16521 // accordingly if either input is. Note, that -1 means that all elements
16522 // are undefined.
16523 int LHSFirstElt = 0;
16524 int RHSFirstElt = ShuffleNumElts;
16525 int LHSLastElt = -1;
16526 int RHSLastElt = -1;
16527
16528 // Get the permuted scalar to vector nodes for the source(s) that come from
16529 // ISD::SCALAR_TO_VECTOR.
16530 // On big endian systems, this only makes sense for element sizes smaller
16531 // than 64 bits since for 64-bit elements, all instructions already put
16532 // the value into element zero. Since scalar size of LHS and RHS may differ
16533 // after isScalarToVec, this should be checked using their own sizes.
16534 int LHSScalarSize = 0;
16535 int RHSScalarSize = 0;
16536 if (SToVLHS) {
16537 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16538 if (!IsLittleEndian && LHSScalarSize >= 64)
16539 return Res;
16540 }
16541 if (SToVRHS) {
16542 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16543 if (!IsLittleEndian && RHSScalarSize >= 64)
16544 return Res;
16545 }
16546 if (LHSScalarSize != 0)
16548 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16549 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16550 if (RHSScalarSize != 0)
16552 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16553 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16554
16555 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16556 return Res;
16557
16558 // Fix up the shuffle mask to reflect where the desired element actually is.
16559 // The minimum and maximum indices that correspond to element zero for both
16560 // the LHS and RHS are computed and will control which shuffle mask entries
16561 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16562 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16564 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16565 LHSNumValidElts, RHSNumValidElts, Subtarget);
16566 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16567
16568 // We may have simplified away the shuffle. We won't be able to do anything
16569 // further with it here.
16570 if (!isa<ShuffleVectorSDNode>(Res))
16571 return Res;
16572 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16573 }
16574
16575 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16576 // The common case after we commuted the shuffle is that the RHS is a splat
16577 // and we have elements coming in from the splat at indices that are not
16578 // conducive to using a merge.
16579 // Example:
16580 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16581 if (!isSplatBV(TheSplat))
16582 return Res;
16583
16584 // We are looking for a mask such that all even elements are from
16585 // one vector and all odd elements from the other.
16586 if (!isAlternatingShuffMask(Mask, NumElts))
16587 return Res;
16588
16589 // Adjust the mask so we are pulling in the same index from the splat
16590 // as the index from the interesting vector in consecutive elements.
16591 if (IsLittleEndian) {
16592 // Example (even elements from first vector):
16593 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16594 if (Mask[0] < NumElts)
16595 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16596 if (ShuffV[i] < 0)
16597 continue;
16598 // If element from non-splat is undef, pick first element from splat.
16599 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16600 }
16601 // Example (odd elements from first vector):
16602 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16603 else
16604 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16605 if (ShuffV[i] < 0)
16606 continue;
16607 // If element from non-splat is undef, pick first element from splat.
16608 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16609 }
16610 } else {
16611 // Example (even elements from first vector):
16612 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16613 if (Mask[0] < NumElts)
16614 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16615 if (ShuffV[i] < 0)
16616 continue;
16617 // If element from non-splat is undef, pick first element from splat.
16618 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16619 }
16620 // Example (odd elements from first vector):
16621 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16622 else
16623 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16624 if (ShuffV[i] < 0)
16625 continue;
16626 // If element from non-splat is undef, pick first element from splat.
16627 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16628 }
16629 }
16630
16631 // If the RHS has undefs, we need to remove them since we may have created
16632 // a shuffle that adds those instead of the splat value.
16633 SDValue SplatVal =
16634 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16635 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16636
16637 if (IsLittleEndian)
16638 RHS = TheSplat;
16639 else
16640 LHS = TheSplat;
16641 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16642}
16643
16644SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16645 LSBaseSDNode *LSBase,
16646 DAGCombinerInfo &DCI) const {
16647 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16648 "Not a reverse memop pattern!");
16649
16650 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16651 auto Mask = SVN->getMask();
16652 int i = 0;
16653 auto I = Mask.rbegin();
16654 auto E = Mask.rend();
16655
16656 for (; I != E; ++I) {
16657 if (*I != i)
16658 return false;
16659 i++;
16660 }
16661 return true;
16662 };
16663
16664 SelectionDAG &DAG = DCI.DAG;
16665 EVT VT = SVN->getValueType(0);
16666
16667 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16668 return SDValue();
16669
16670 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16671 // See comment in PPCVSXSwapRemoval.cpp.
16672 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16673 if (!Subtarget.hasP9Vector())
16674 return SDValue();
16675
16676 if(!IsElementReverse(SVN))
16677 return SDValue();
16678
16679 if (LSBase->getOpcode() == ISD::LOAD) {
16680 // If the load return value 0 has more than one user except the
16681 // shufflevector instruction, it is not profitable to replace the
16682 // shufflevector with a reverse load.
16683 for (SDUse &Use : LSBase->uses())
16684 if (Use.getResNo() == 0 &&
16685 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16686 return SDValue();
16687
16688 SDLoc dl(LSBase);
16689 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16690 return DAG.getMemIntrinsicNode(
16691 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16692 LSBase->getMemoryVT(), LSBase->getMemOperand());
16693 }
16694
16695 if (LSBase->getOpcode() == ISD::STORE) {
16696 // If there are other uses of the shuffle, the swap cannot be avoided.
16697 // Forcing the use of an X-Form (since swapped stores only have
16698 // X-Forms) without removing the swap is unprofitable.
16699 if (!SVN->hasOneUse())
16700 return SDValue();
16701
16702 SDLoc dl(LSBase);
16703 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16704 LSBase->getBasePtr()};
16705 return DAG.getMemIntrinsicNode(
16706 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16707 LSBase->getMemoryVT(), LSBase->getMemOperand());
16708 }
16709
16710 llvm_unreachable("Expected a load or store node here");
16711}
16712
16713static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16714 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16715 if (IntrinsicID == Intrinsic::ppc_stdcx)
16716 StoreWidth = 8;
16717 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16718 StoreWidth = 4;
16719 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16720 StoreWidth = 2;
16721 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16722 StoreWidth = 1;
16723 else
16724 return false;
16725 return true;
16726}
16727
16730 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
16731 // (ADDC (ADDE 0, 0, C), -1) -> C
16732 SDValue LHS = N->getOperand(0);
16733 SDValue RHS = N->getOperand(1);
16734 if (LHS->getOpcode() == PPCISD::ADDE &&
16735 isNullConstant(LHS->getOperand(0)) &&
16736 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
16737 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
16738 }
16739 }
16740 return SDValue();
16741}
16742
16744 DAGCombinerInfo &DCI) const {
16745 SelectionDAG &DAG = DCI.DAG;
16746 SDLoc dl(N);
16747 switch (N->getOpcode()) {
16748 default: break;
16749 case ISD::ADD:
16750 return combineADD(N, DCI);
16751 case ISD::AND: {
16752 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16753 // original input as that will prevent us from selecting optimal rotates.
16754 // This only matters if the input to the extend is i32 widened to i64.
16755 SDValue Op1 = N->getOperand(0);
16756 SDValue Op2 = N->getOperand(1);
16757 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16758 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16759 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16760 Op1.getOperand(0).getValueType() != MVT::i32)
16761 break;
16762 SDValue NarrowOp = Op1.getOperand(0);
16763 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16764 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16765 break;
16766
16767 uint64_t Imm = Op2->getAsZExtVal();
16768 // Make sure that the constant is narrow enough to fit in the narrow type.
16769 if (!isUInt<32>(Imm))
16770 break;
16771 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16772 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16773 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16774 }
16775 case ISD::SHL:
16776 return combineSHL(N, DCI);
16777 case ISD::SRA:
16778 return combineSRA(N, DCI);
16779 case ISD::SRL:
16780 return combineSRL(N, DCI);
16781 case ISD::MUL:
16782 return combineMUL(N, DCI);
16783 case ISD::FMA:
16784 case PPCISD::FNMSUB:
16785 return combineFMALike(N, DCI);
16786 case PPCISD::SHL:
16787 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16788 return N->getOperand(0);
16789 break;
16790 case PPCISD::SRL:
16791 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16792 return N->getOperand(0);
16793 break;
16794 case PPCISD::SRA:
16795 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16796 if (C->isZero() || // 0 >>s V -> 0.
16797 C->isAllOnes()) // -1 >>s V -> -1.
16798 return N->getOperand(0);
16799 }
16800 break;
16801 case ISD::SIGN_EXTEND:
16802 case ISD::ZERO_EXTEND:
16803 case ISD::ANY_EXTEND:
16804 return DAGCombineExtBoolTrunc(N, DCI);
16805 case ISD::TRUNCATE:
16806 return combineTRUNCATE(N, DCI);
16807 case ISD::SETCC:
16808 if (SDValue CSCC = combineSetCC(N, DCI))
16809 return CSCC;
16810 [[fallthrough]];
16811 case ISD::SELECT_CC:
16812 return DAGCombineTruncBoolExt(N, DCI);
16813 case ISD::SINT_TO_FP:
16814 case ISD::UINT_TO_FP:
16815 return combineFPToIntToFP(N, DCI);
16817 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16818 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16819 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16820 }
16821 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16822 case ISD::STORE: {
16823
16824 EVT Op1VT = N->getOperand(1).getValueType();
16825 unsigned Opcode = N->getOperand(1).getOpcode();
16826
16827 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16828 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16829 SDValue Val = combineStoreFPToInt(N, DCI);
16830 if (Val)
16831 return Val;
16832 }
16833
16834 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16835 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16836 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16837 if (Val)
16838 return Val;
16839 }
16840
16841 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16842 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16843 N->getOperand(1).getNode()->hasOneUse() &&
16844 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16845 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16846
16847 // STBRX can only handle simple types and it makes no sense to store less
16848 // two bytes in byte-reversed order.
16849 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16850 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16851 break;
16852
16853 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16854 // Do an any-extend to 32-bits if this is a half-word input.
16855 if (BSwapOp.getValueType() == MVT::i16)
16856 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16857
16858 // If the type of BSWAP operand is wider than stored memory width
16859 // it need to be shifted to the right side before STBRX.
16860 if (Op1VT.bitsGT(mVT)) {
16861 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16862 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16863 DAG.getConstant(Shift, dl, MVT::i32));
16864 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16865 if (Op1VT == MVT::i64)
16866 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16867 }
16868
16869 SDValue Ops[] = {
16870 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16871 };
16872 return
16873 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16874 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16875 cast<StoreSDNode>(N)->getMemOperand());
16876 }
16877
16878 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16879 // So it can increase the chance of CSE constant construction.
16880 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16881 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16882 // Need to sign-extended to 64-bits to handle negative values.
16883 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16884 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16885 MemVT.getSizeInBits());
16886 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16887
16888 auto *ST = cast<StoreSDNode>(N);
16889 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
16890 ST->getBasePtr(), ST->getOffset(), MemVT,
16891 ST->getMemOperand(), ST->getAddressingMode(),
16892 /*IsTruncating=*/true);
16893 // Note we use CombineTo here to prevent DAGCombiner from visiting the
16894 // new store which will change the constant by removing non-demanded bits.
16895 return ST->isUnindexed()
16896 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
16897 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
16898 }
16899
16900 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16901 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16902 if (Op1VT.isSimple()) {
16903 MVT StoreVT = Op1VT.getSimpleVT();
16904 if (Subtarget.needsSwapsForVSXMemOps() &&
16905 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16906 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16907 return expandVSXStoreForLE(N, DCI);
16908 }
16909 break;
16910 }
16911 case ISD::LOAD: {
16912 LoadSDNode *LD = cast<LoadSDNode>(N);
16913 EVT VT = LD->getValueType(0);
16914
16915 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16916 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16917 if (VT.isSimple()) {
16918 MVT LoadVT = VT.getSimpleVT();
16919 if (Subtarget.needsSwapsForVSXMemOps() &&
16920 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16921 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16922 return expandVSXLoadForLE(N, DCI);
16923 }
16924
16925 // We sometimes end up with a 64-bit integer load, from which we extract
16926 // two single-precision floating-point numbers. This happens with
16927 // std::complex<float>, and other similar structures, because of the way we
16928 // canonicalize structure copies. However, if we lack direct moves,
16929 // then the final bitcasts from the extracted integer values to the
16930 // floating-point numbers turn into store/load pairs. Even with direct moves,
16931 // just loading the two floating-point numbers is likely better.
16932 auto ReplaceTwoFloatLoad = [&]() {
16933 if (VT != MVT::i64)
16934 return false;
16935
16936 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16937 LD->isVolatile())
16938 return false;
16939
16940 // We're looking for a sequence like this:
16941 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16942 // t16: i64 = srl t13, Constant:i32<32>
16943 // t17: i32 = truncate t16
16944 // t18: f32 = bitcast t17
16945 // t19: i32 = truncate t13
16946 // t20: f32 = bitcast t19
16947
16948 if (!LD->hasNUsesOfValue(2, 0))
16949 return false;
16950
16951 auto UI = LD->user_begin();
16952 while (UI.getUse().getResNo() != 0) ++UI;
16953 SDNode *Trunc = *UI++;
16954 while (UI.getUse().getResNo() != 0) ++UI;
16955 SDNode *RightShift = *UI;
16956 if (Trunc->getOpcode() != ISD::TRUNCATE)
16957 std::swap(Trunc, RightShift);
16958
16959 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16960 Trunc->getValueType(0) != MVT::i32 ||
16961 !Trunc->hasOneUse())
16962 return false;
16963 if (RightShift->getOpcode() != ISD::SRL ||
16964 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16965 RightShift->getConstantOperandVal(1) != 32 ||
16966 !RightShift->hasOneUse())
16967 return false;
16968
16969 SDNode *Trunc2 = *RightShift->user_begin();
16970 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16971 Trunc2->getValueType(0) != MVT::i32 ||
16972 !Trunc2->hasOneUse())
16973 return false;
16974
16975 SDNode *Bitcast = *Trunc->user_begin();
16976 SDNode *Bitcast2 = *Trunc2->user_begin();
16977
16978 if (Bitcast->getOpcode() != ISD::BITCAST ||
16979 Bitcast->getValueType(0) != MVT::f32)
16980 return false;
16981 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16982 Bitcast2->getValueType(0) != MVT::f32)
16983 return false;
16984
16985 if (Subtarget.isLittleEndian())
16986 std::swap(Bitcast, Bitcast2);
16987
16988 // Bitcast has the second float (in memory-layout order) and Bitcast2
16989 // has the first one.
16990
16991 SDValue BasePtr = LD->getBasePtr();
16992 if (LD->isIndexed()) {
16993 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16994 "Non-pre-inc AM on PPC?");
16995 BasePtr =
16996 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16997 LD->getOffset());
16998 }
16999
17000 auto MMOFlags =
17001 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17002 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17003 LD->getPointerInfo(), LD->getAlign(),
17004 MMOFlags, LD->getAAInfo());
17005 SDValue AddPtr =
17006 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17007 BasePtr, DAG.getIntPtrConstant(4, dl));
17008 SDValue FloatLoad2 = DAG.getLoad(
17009 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17010 LD->getPointerInfo().getWithOffset(4),
17011 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17012
17013 if (LD->isIndexed()) {
17014 // Note that DAGCombine should re-form any pre-increment load(s) from
17015 // what is produced here if that makes sense.
17016 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17017 }
17018
17019 DCI.CombineTo(Bitcast2, FloatLoad);
17020 DCI.CombineTo(Bitcast, FloatLoad2);
17021
17022 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17023 SDValue(FloatLoad2.getNode(), 1));
17024 return true;
17025 };
17026
17027 if (ReplaceTwoFloatLoad())
17028 return SDValue(N, 0);
17029
17030 EVT MemVT = LD->getMemoryVT();
17031 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17032 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17033 if (LD->isUnindexed() && VT.isVector() &&
17034 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17035 // P8 and later hardware should just use LOAD.
17036 !Subtarget.hasP8Vector() &&
17037 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17038 VT == MVT::v4f32))) &&
17039 LD->getAlign() < ABIAlignment) {
17040 // This is a type-legal unaligned Altivec load.
17041 SDValue Chain = LD->getChain();
17042 SDValue Ptr = LD->getBasePtr();
17043 bool isLittleEndian = Subtarget.isLittleEndian();
17044
17045 // This implements the loading of unaligned vectors as described in
17046 // the venerable Apple Velocity Engine overview. Specifically:
17047 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17048 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17049 //
17050 // The general idea is to expand a sequence of one or more unaligned
17051 // loads into an alignment-based permutation-control instruction (lvsl
17052 // or lvsr), a series of regular vector loads (which always truncate
17053 // their input address to an aligned address), and a series of
17054 // permutations. The results of these permutations are the requested
17055 // loaded values. The trick is that the last "extra" load is not taken
17056 // from the address you might suspect (sizeof(vector) bytes after the
17057 // last requested load), but rather sizeof(vector) - 1 bytes after the
17058 // last requested vector. The point of this is to avoid a page fault if
17059 // the base address happened to be aligned. This works because if the
17060 // base address is aligned, then adding less than a full vector length
17061 // will cause the last vector in the sequence to be (re)loaded.
17062 // Otherwise, the next vector will be fetched as you might suspect was
17063 // necessary.
17064
17065 // We might be able to reuse the permutation generation from
17066 // a different base address offset from this one by an aligned amount.
17067 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17068 // optimization later.
17069 Intrinsic::ID Intr, IntrLD, IntrPerm;
17070 MVT PermCntlTy, PermTy, LDTy;
17071 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17072 : Intrinsic::ppc_altivec_lvsl;
17073 IntrLD = Intrinsic::ppc_altivec_lvx;
17074 IntrPerm = Intrinsic::ppc_altivec_vperm;
17075 PermCntlTy = MVT::v16i8;
17076 PermTy = MVT::v4i32;
17077 LDTy = MVT::v4i32;
17078
17079 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17080
17081 // Create the new MMO for the new base load. It is like the original MMO,
17082 // but represents an area in memory almost twice the vector size centered
17083 // on the original address. If the address is unaligned, we might start
17084 // reading up to (sizeof(vector)-1) bytes below the address of the
17085 // original unaligned load.
17087 MachineMemOperand *BaseMMO =
17088 MF.getMachineMemOperand(LD->getMemOperand(),
17089 -(int64_t)MemVT.getStoreSize()+1,
17090 2*MemVT.getStoreSize()-1);
17091
17092 // Create the new base load.
17093 SDValue LDXIntID =
17094 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17095 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17096 SDValue BaseLoad =
17098 DAG.getVTList(PermTy, MVT::Other),
17099 BaseLoadOps, LDTy, BaseMMO);
17100
17101 // Note that the value of IncOffset (which is provided to the next
17102 // load's pointer info offset value, and thus used to calculate the
17103 // alignment), and the value of IncValue (which is actually used to
17104 // increment the pointer value) are different! This is because we
17105 // require the next load to appear to be aligned, even though it
17106 // is actually offset from the base pointer by a lesser amount.
17107 int IncOffset = VT.getSizeInBits() / 8;
17108 int IncValue = IncOffset;
17109
17110 // Walk (both up and down) the chain looking for another load at the real
17111 // (aligned) offset (the alignment of the other load does not matter in
17112 // this case). If found, then do not use the offset reduction trick, as
17113 // that will prevent the loads from being later combined (as they would
17114 // otherwise be duplicates).
17115 if (!findConsecutiveLoad(LD, DAG))
17116 --IncValue;
17117
17118 SDValue Increment =
17119 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17120 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17121
17122 MachineMemOperand *ExtraMMO =
17123 MF.getMachineMemOperand(LD->getMemOperand(),
17124 1, 2*MemVT.getStoreSize()-1);
17125 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17126 SDValue ExtraLoad =
17128 DAG.getVTList(PermTy, MVT::Other),
17129 ExtraLoadOps, LDTy, ExtraMMO);
17130
17131 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17132 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17133
17134 // Because vperm has a big-endian bias, we must reverse the order
17135 // of the input vectors and complement the permute control vector
17136 // when generating little endian code. We have already handled the
17137 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17138 // and ExtraLoad here.
17139 SDValue Perm;
17140 if (isLittleEndian)
17141 Perm = BuildIntrinsicOp(IntrPerm,
17142 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17143 else
17144 Perm = BuildIntrinsicOp(IntrPerm,
17145 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17146
17147 if (VT != PermTy)
17148 Perm = Subtarget.hasAltivec()
17149 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17150 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17151 DAG.getTargetConstant(1, dl, MVT::i64));
17152 // second argument is 1 because this rounding
17153 // is always exact.
17154
17155 // The output of the permutation is our loaded result, the TokenFactor is
17156 // our new chain.
17157 DCI.CombineTo(N, Perm, TF);
17158 return SDValue(N, 0);
17159 }
17160 }
17161 break;
17163 bool isLittleEndian = Subtarget.isLittleEndian();
17164 unsigned IID = N->getConstantOperandVal(0);
17165 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17166 : Intrinsic::ppc_altivec_lvsl);
17167 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17168 SDValue Add = N->getOperand(1);
17169
17170 int Bits = 4 /* 16 byte alignment */;
17171
17172 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17173 APInt::getAllOnes(Bits /* alignment */)
17174 .zext(Add.getScalarValueSizeInBits()))) {
17175 SDNode *BasePtr = Add->getOperand(0).getNode();
17176 for (SDNode *U : BasePtr->users()) {
17177 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17178 U->getConstantOperandVal(0) == IID) {
17179 // We've found another LVSL/LVSR, and this address is an aligned
17180 // multiple of that one. The results will be the same, so use the
17181 // one we've just found instead.
17182
17183 return SDValue(U, 0);
17184 }
17185 }
17186 }
17187
17188 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17189 SDNode *BasePtr = Add->getOperand(0).getNode();
17190 for (SDNode *U : BasePtr->users()) {
17191 if (U->getOpcode() == ISD::ADD &&
17192 isa<ConstantSDNode>(U->getOperand(1)) &&
17193 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17194 (1ULL << Bits) ==
17195 0) {
17196 SDNode *OtherAdd = U;
17197 for (SDNode *V : OtherAdd->users()) {
17198 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17199 V->getConstantOperandVal(0) == IID) {
17200 return SDValue(V, 0);
17201 }
17202 }
17203 }
17204 }
17205 }
17206 }
17207
17208 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17209 // Expose the vabsduw/h/b opportunity for down stream
17210 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17211 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17212 IID == Intrinsic::ppc_altivec_vmaxsh ||
17213 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17214 SDValue V1 = N->getOperand(1);
17215 SDValue V2 = N->getOperand(2);
17216 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17217 V1.getSimpleValueType() == MVT::v8i16 ||
17218 V1.getSimpleValueType() == MVT::v16i8) &&
17220 // (0-a, a)
17221 if (V1.getOpcode() == ISD::SUB &&
17223 V1.getOperand(1) == V2) {
17224 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17225 }
17226 // (a, 0-a)
17227 if (V2.getOpcode() == ISD::SUB &&
17229 V2.getOperand(1) == V1) {
17230 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17231 }
17232 // (x-y, y-x)
17233 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17234 V1.getOperand(0) == V2.getOperand(1) &&
17235 V1.getOperand(1) == V2.getOperand(0)) {
17236 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17237 }
17238 }
17239 }
17240 }
17241
17242 break;
17244 switch (N->getConstantOperandVal(1)) {
17245 default:
17246 break;
17247 case Intrinsic::ppc_altivec_vsum4sbs:
17248 case Intrinsic::ppc_altivec_vsum4shs:
17249 case Intrinsic::ppc_altivec_vsum4ubs: {
17250 // These sum-across intrinsics only have a chain due to the side effect
17251 // that they may set the SAT bit. If we know the SAT bit will not be set
17252 // for some inputs, we can replace any uses of their chain with the
17253 // input chain.
17254 if (BuildVectorSDNode *BVN =
17255 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17256 APInt APSplatBits, APSplatUndef;
17257 unsigned SplatBitSize;
17258 bool HasAnyUndefs;
17259 bool BVNIsConstantSplat = BVN->isConstantSplat(
17260 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17261 !Subtarget.isLittleEndian());
17262 // If the constant splat vector is 0, the SAT bit will not be set.
17263 if (BVNIsConstantSplat && APSplatBits == 0)
17264 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17265 }
17266 return SDValue();
17267 }
17268 case Intrinsic::ppc_vsx_lxvw4x:
17269 case Intrinsic::ppc_vsx_lxvd2x:
17270 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17271 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17272 if (Subtarget.needsSwapsForVSXMemOps())
17273 return expandVSXLoadForLE(N, DCI);
17274 break;
17275 }
17276 break;
17278 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17279 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17280 if (Subtarget.needsSwapsForVSXMemOps()) {
17281 switch (N->getConstantOperandVal(1)) {
17282 default:
17283 break;
17284 case Intrinsic::ppc_vsx_stxvw4x:
17285 case Intrinsic::ppc_vsx_stxvd2x:
17286 return expandVSXStoreForLE(N, DCI);
17287 }
17288 }
17289 break;
17290 case ISD::BSWAP: {
17291 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17292 // For subtargets without LDBRX, we can still do better than the default
17293 // expansion even for 64-bit BSWAP (LOAD).
17294 bool Is64BitBswapOn64BitTgt =
17295 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17296 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17297 N->getOperand(0).hasOneUse();
17298 if (IsSingleUseNormalLd &&
17299 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17300 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17301 SDValue Load = N->getOperand(0);
17302 LoadSDNode *LD = cast<LoadSDNode>(Load);
17303 // Create the byte-swapping load.
17304 SDValue Ops[] = {
17305 LD->getChain(), // Chain
17306 LD->getBasePtr(), // Ptr
17307 DAG.getValueType(N->getValueType(0)) // VT
17308 };
17309 SDValue BSLoad =
17311 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17312 MVT::i64 : MVT::i32, MVT::Other),
17313 Ops, LD->getMemoryVT(), LD->getMemOperand());
17314
17315 // If this is an i16 load, insert the truncate.
17316 SDValue ResVal = BSLoad;
17317 if (N->getValueType(0) == MVT::i16)
17318 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17319
17320 // First, combine the bswap away. This makes the value produced by the
17321 // load dead.
17322 DCI.CombineTo(N, ResVal);
17323
17324 // Next, combine the load away, we give it a bogus result value but a real
17325 // chain result. The result value is dead because the bswap is dead.
17326 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17327
17328 // Return N so it doesn't get rechecked!
17329 return SDValue(N, 0);
17330 }
17331 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17332 // before legalization so that the BUILD_PAIR is handled correctly.
17333 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17334 !IsSingleUseNormalLd)
17335 return SDValue();
17336 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17337
17338 // Can't split volatile or atomic loads.
17339 if (!LD->isSimple())
17340 return SDValue();
17341 SDValue BasePtr = LD->getBasePtr();
17342 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17343 LD->getPointerInfo(), LD->getAlign());
17344 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17345 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17346 DAG.getIntPtrConstant(4, dl));
17348 LD->getMemOperand(), 4, 4);
17349 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17350 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17351 SDValue Res;
17352 if (Subtarget.isLittleEndian())
17353 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17354 else
17355 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17356 SDValue TF =
17357 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17358 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17359 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17360 return Res;
17361 }
17362 case PPCISD::VCMP:
17363 // If a VCMP_rec node already exists with exactly the same operands as this
17364 // node, use its result instead of this node (VCMP_rec computes both a CR6
17365 // and a normal output).
17366 //
17367 if (!N->getOperand(0).hasOneUse() &&
17368 !N->getOperand(1).hasOneUse() &&
17369 !N->getOperand(2).hasOneUse()) {
17370
17371 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17372 SDNode *VCMPrecNode = nullptr;
17373
17374 SDNode *LHSN = N->getOperand(0).getNode();
17375 for (SDNode *User : LHSN->users())
17376 if (User->getOpcode() == PPCISD::VCMP_rec &&
17377 User->getOperand(1) == N->getOperand(1) &&
17378 User->getOperand(2) == N->getOperand(2) &&
17379 User->getOperand(0) == N->getOperand(0)) {
17380 VCMPrecNode = User;
17381 break;
17382 }
17383
17384 // If there is no VCMP_rec node, or if the flag value has a single use,
17385 // don't transform this.
17386 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17387 break;
17388
17389 // Look at the (necessarily single) use of the flag value. If it has a
17390 // chain, this transformation is more complex. Note that multiple things
17391 // could use the value result, which we should ignore.
17392 SDNode *FlagUser = nullptr;
17393 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17394 FlagUser == nullptr; ++UI) {
17395 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17396 SDNode *User = UI->getUser();
17397 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17398 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17399 FlagUser = User;
17400 break;
17401 }
17402 }
17403 }
17404
17405 // If the user is a MFOCRF instruction, we know this is safe.
17406 // Otherwise we give up for right now.
17407 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17408 return SDValue(VCMPrecNode, 0);
17409 }
17410 break;
17411 case ISD::BR_CC: {
17412 // If this is a branch on an altivec predicate comparison, lower this so
17413 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17414 // lowering is done pre-legalize, because the legalizer lowers the predicate
17415 // compare down to code that is difficult to reassemble.
17416 // This code also handles branches that depend on the result of a store
17417 // conditional.
17418 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17419 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17420
17421 int CompareOpc;
17422 bool isDot;
17423
17424 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17425 break;
17426
17427 // Since we are doing this pre-legalize, the RHS can be a constant of
17428 // arbitrary bitwidth which may cause issues when trying to get the value
17429 // from the underlying APInt.
17430 auto RHSAPInt = RHS->getAsAPIntVal();
17431 if (!RHSAPInt.isIntN(64))
17432 break;
17433
17434 unsigned Val = RHSAPInt.getZExtValue();
17435 auto isImpossibleCompare = [&]() {
17436 // If this is a comparison against something other than 0/1, then we know
17437 // that the condition is never/always true.
17438 if (Val != 0 && Val != 1) {
17439 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17440 return N->getOperand(0);
17441 // Always !=, turn it into an unconditional branch.
17442 return DAG.getNode(ISD::BR, dl, MVT::Other,
17443 N->getOperand(0), N->getOperand(4));
17444 }
17445 return SDValue();
17446 };
17447 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17448 unsigned StoreWidth = 0;
17449 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17450 isStoreConditional(LHS, StoreWidth)) {
17451 if (SDValue Impossible = isImpossibleCompare())
17452 return Impossible;
17453 PPC::Predicate CompOpc;
17454 // eq 0 => ne
17455 // ne 0 => eq
17456 // eq 1 => eq
17457 // ne 1 => ne
17458 if (Val == 0)
17459 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17460 else
17461 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17462
17463 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17464 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17465 auto *MemNode = cast<MemSDNode>(LHS);
17466 SDValue ConstSt = DAG.getMemIntrinsicNode(
17468 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17469 MemNode->getMemoryVT(), MemNode->getMemOperand());
17470
17471 SDValue InChain;
17472 // Unchain the branch from the original store conditional.
17473 if (N->getOperand(0) == LHS.getValue(1))
17474 InChain = LHS.getOperand(0);
17475 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17476 SmallVector<SDValue, 4> InChains;
17477 SDValue InTF = N->getOperand(0);
17478 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17479 if (InTF.getOperand(i) != LHS.getValue(1))
17480 InChains.push_back(InTF.getOperand(i));
17481 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17482 }
17483
17484 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17485 DAG.getConstant(CompOpc, dl, MVT::i32),
17486 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17487 ConstSt.getValue(2));
17488 }
17489
17490 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17491 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17492 assert(isDot && "Can't compare against a vector result!");
17493
17494 if (SDValue Impossible = isImpossibleCompare())
17495 return Impossible;
17496
17497 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17498 // Create the PPCISD altivec 'dot' comparison node.
17499 SDValue Ops[] = {
17500 LHS.getOperand(2), // LHS of compare
17501 LHS.getOperand(3), // RHS of compare
17502 DAG.getConstant(CompareOpc, dl, MVT::i32)
17503 };
17504 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17505 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17506
17507 // Unpack the result based on how the target uses it.
17508 PPC::Predicate CompOpc;
17509 switch (LHS.getConstantOperandVal(1)) {
17510 default: // Can't happen, don't crash on invalid number though.
17511 case 0: // Branch on the value of the EQ bit of CR6.
17512 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17513 break;
17514 case 1: // Branch on the inverted value of the EQ bit of CR6.
17515 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17516 break;
17517 case 2: // Branch on the value of the LT bit of CR6.
17518 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17519 break;
17520 case 3: // Branch on the inverted value of the LT bit of CR6.
17521 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17522 break;
17523 }
17524
17525 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17526 DAG.getConstant(CompOpc, dl, MVT::i32),
17527 DAG.getRegister(PPC::CR6, MVT::i32),
17528 N->getOperand(4), CompNode.getValue(1));
17529 }
17530 break;
17531 }
17532 case ISD::BUILD_VECTOR:
17533 return DAGCombineBuildVector(N, DCI);
17534 case PPCISD::ADDC:
17535 return DAGCombineAddc(N, DCI);
17536 }
17537
17538 return SDValue();
17539}
17540
17541SDValue
17543 SelectionDAG &DAG,
17544 SmallVectorImpl<SDNode *> &Created) const {
17545 // fold (sdiv X, pow2)
17546 EVT VT = N->getValueType(0);
17547 if (VT == MVT::i64 && !Subtarget.isPPC64())
17548 return SDValue();
17549 if ((VT != MVT::i32 && VT != MVT::i64) ||
17550 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17551 return SDValue();
17552
17553 SDLoc DL(N);
17554 SDValue N0 = N->getOperand(0);
17555
17556 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17557 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17558 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17559
17560 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17561 Created.push_back(Op.getNode());
17562
17563 if (IsNegPow2) {
17564 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17565 Created.push_back(Op.getNode());
17566 }
17567
17568 return Op;
17569}
17570
17571//===----------------------------------------------------------------------===//
17572// Inline Assembly Support
17573//===----------------------------------------------------------------------===//
17574
17576 KnownBits &Known,
17577 const APInt &DemandedElts,
17578 const SelectionDAG &DAG,
17579 unsigned Depth) const {
17580 Known.resetAll();
17581 switch (Op.getOpcode()) {
17582 default: break;
17583 case PPCISD::LBRX: {
17584 // lhbrx is known to have the top bits cleared out.
17585 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17586 Known.Zero = 0xFFFF0000;
17587 break;
17588 }
17589 case PPCISD::ADDE: {
17590 if (Op.getResNo() == 0) {
17591 // (0|1), _ = ADDE 0, 0, CARRY
17592 SDValue LHS = Op.getOperand(0);
17593 SDValue RHS = Op.getOperand(1);
17595 Known.Zero = ~1ULL;
17596 }
17597 break;
17598 }
17600 switch (Op.getConstantOperandVal(0)) {
17601 default: break;
17602 case Intrinsic::ppc_altivec_vcmpbfp_p:
17603 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17604 case Intrinsic::ppc_altivec_vcmpequb_p:
17605 case Intrinsic::ppc_altivec_vcmpequh_p:
17606 case Intrinsic::ppc_altivec_vcmpequw_p:
17607 case Intrinsic::ppc_altivec_vcmpequd_p:
17608 case Intrinsic::ppc_altivec_vcmpequq_p:
17609 case Intrinsic::ppc_altivec_vcmpgefp_p:
17610 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17611 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17612 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17613 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17614 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17615 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17616 case Intrinsic::ppc_altivec_vcmpgtub_p:
17617 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17618 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17619 case Intrinsic::ppc_altivec_vcmpgtud_p:
17620 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17621 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17622 break;
17623 }
17624 break;
17625 }
17627 switch (Op.getConstantOperandVal(1)) {
17628 default:
17629 break;
17630 case Intrinsic::ppc_load2r:
17631 // Top bits are cleared for load2r (which is the same as lhbrx).
17632 Known.Zero = 0xFFFF0000;
17633 break;
17634 }
17635 break;
17636 }
17637 }
17638}
17639
17641 switch (Subtarget.getCPUDirective()) {
17642 default: break;
17643 case PPC::DIR_970:
17644 case PPC::DIR_PWR4:
17645 case PPC::DIR_PWR5:
17646 case PPC::DIR_PWR5X:
17647 case PPC::DIR_PWR6:
17648 case PPC::DIR_PWR6X:
17649 case PPC::DIR_PWR7:
17650 case PPC::DIR_PWR8:
17651 case PPC::DIR_PWR9:
17652 case PPC::DIR_PWR10:
17653 case PPC::DIR_PWR11:
17654 case PPC::DIR_PWR_FUTURE: {
17655 if (!ML)
17656 break;
17657
17659 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17660 // so that we can decrease cache misses and branch-prediction misses.
17661 // Actual alignment of the loop will depend on the hotness check and other
17662 // logic in alignBlocks.
17663 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17664 return Align(32);
17665 }
17666
17667 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17668
17669 // For small loops (between 5 and 8 instructions), align to a 32-byte
17670 // boundary so that the entire loop fits in one instruction-cache line.
17671 uint64_t LoopSize = 0;
17672 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17673 for (const MachineInstr &J : **I) {
17674 LoopSize += TII->getInstSizeInBytes(J);
17675 if (LoopSize > 32)
17676 break;
17677 }
17678
17679 if (LoopSize > 16 && LoopSize <= 32)
17680 return Align(32);
17681
17682 break;
17683 }
17684 }
17685
17687}
17688
17689/// getConstraintType - Given a constraint, return the type of
17690/// constraint it is for this target.
17693 if (Constraint.size() == 1) {
17694 switch (Constraint[0]) {
17695 default: break;
17696 case 'b':
17697 case 'r':
17698 case 'f':
17699 case 'd':
17700 case 'v':
17701 case 'y':
17702 return C_RegisterClass;
17703 case 'Z':
17704 // FIXME: While Z does indicate a memory constraint, it specifically
17705 // indicates an r+r address (used in conjunction with the 'y' modifier
17706 // in the replacement string). Currently, we're forcing the base
17707 // register to be r0 in the asm printer (which is interpreted as zero)
17708 // and forming the complete address in the second register. This is
17709 // suboptimal.
17710 return C_Memory;
17711 }
17712 } else if (Constraint == "wc") { // individual CR bits.
17713 return C_RegisterClass;
17714 } else if (Constraint == "wa" || Constraint == "wd" ||
17715 Constraint == "wf" || Constraint == "ws" ||
17716 Constraint == "wi" || Constraint == "ww") {
17717 return C_RegisterClass; // VSX registers.
17718 }
17719 return TargetLowering::getConstraintType(Constraint);
17720}
17721
17722/// Examine constraint type and operand type and determine a weight value.
17723/// This object must already have been set up with the operand type
17724/// and the current alternative constraint selected.
17727 AsmOperandInfo &info, const char *constraint) const {
17729 Value *CallOperandVal = info.CallOperandVal;
17730 // If we don't have a value, we can't do a match,
17731 // but allow it at the lowest weight.
17732 if (!CallOperandVal)
17733 return CW_Default;
17734 Type *type = CallOperandVal->getType();
17735
17736 // Look at the constraint type.
17737 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17738 return CW_Register; // an individual CR bit.
17739 else if ((StringRef(constraint) == "wa" ||
17740 StringRef(constraint) == "wd" ||
17741 StringRef(constraint) == "wf") &&
17742 type->isVectorTy())
17743 return CW_Register;
17744 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17745 return CW_Register; // just hold 64-bit integers data.
17746 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17747 return CW_Register;
17748 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17749 return CW_Register;
17750
17751 switch (*constraint) {
17752 default:
17754 break;
17755 case 'b':
17756 if (type->isIntegerTy())
17757 weight = CW_Register;
17758 break;
17759 case 'f':
17760 if (type->isFloatTy())
17761 weight = CW_Register;
17762 break;
17763 case 'd':
17764 if (type->isDoubleTy())
17765 weight = CW_Register;
17766 break;
17767 case 'v':
17768 if (type->isVectorTy())
17769 weight = CW_Register;
17770 break;
17771 case 'y':
17772 weight = CW_Register;
17773 break;
17774 case 'Z':
17775 weight = CW_Memory;
17776 break;
17777 }
17778 return weight;
17779}
17780
17781std::pair<unsigned, const TargetRegisterClass *>
17783 StringRef Constraint,
17784 MVT VT) const {
17785 if (Constraint.size() == 1) {
17786 // GCC RS6000 Constraint Letters
17787 switch (Constraint[0]) {
17788 case 'b': // R1-R31
17789 if (VT == MVT::i64 && Subtarget.isPPC64())
17790 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17791 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17792 case 'r': // R0-R31
17793 if (VT == MVT::i64 && Subtarget.isPPC64())
17794 return std::make_pair(0U, &PPC::G8RCRegClass);
17795 return std::make_pair(0U, &PPC::GPRCRegClass);
17796 // 'd' and 'f' constraints are both defined to be "the floating point
17797 // registers", where one is for 32-bit and the other for 64-bit. We don't
17798 // really care overly much here so just give them all the same reg classes.
17799 case 'd':
17800 case 'f':
17801 if (Subtarget.hasSPE()) {
17802 if (VT == MVT::f32 || VT == MVT::i32)
17803 return std::make_pair(0U, &PPC::GPRCRegClass);
17804 if (VT == MVT::f64 || VT == MVT::i64)
17805 return std::make_pair(0U, &PPC::SPERCRegClass);
17806 } else {
17807 if (VT == MVT::f32 || VT == MVT::i32)
17808 return std::make_pair(0U, &PPC::F4RCRegClass);
17809 if (VT == MVT::f64 || VT == MVT::i64)
17810 return std::make_pair(0U, &PPC::F8RCRegClass);
17811 }
17812 break;
17813 case 'v':
17814 if (Subtarget.hasAltivec() && VT.isVector())
17815 return std::make_pair(0U, &PPC::VRRCRegClass);
17816 else if (Subtarget.hasVSX())
17817 // Scalars in Altivec registers only make sense with VSX.
17818 return std::make_pair(0U, &PPC::VFRCRegClass);
17819 break;
17820 case 'y': // crrc
17821 return std::make_pair(0U, &PPC::CRRCRegClass);
17822 }
17823 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17824 // An individual CR bit.
17825 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17826 } else if ((Constraint == "wa" || Constraint == "wd" ||
17827 Constraint == "wf" || Constraint == "wi") &&
17828 Subtarget.hasVSX()) {
17829 // A VSX register for either a scalar (FP) or vector. There is no
17830 // support for single precision scalars on subtargets prior to Power8.
17831 if (VT.isVector())
17832 return std::make_pair(0U, &PPC::VSRCRegClass);
17833 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17834 return std::make_pair(0U, &PPC::VSSRCRegClass);
17835 return std::make_pair(0U, &PPC::VSFRCRegClass);
17836 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17837 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17838 return std::make_pair(0U, &PPC::VSSRCRegClass);
17839 else
17840 return std::make_pair(0U, &PPC::VSFRCRegClass);
17841 } else if (Constraint == "lr") {
17842 if (VT == MVT::i64)
17843 return std::make_pair(0U, &PPC::LR8RCRegClass);
17844 else
17845 return std::make_pair(0U, &PPC::LRRCRegClass);
17846 }
17847
17848 // Handle special cases of physical registers that are not properly handled
17849 // by the base class.
17850 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17851 // If we name a VSX register, we can't defer to the base class because it
17852 // will not recognize the correct register (their names will be VSL{0-31}
17853 // and V{0-31} so they won't match). So we match them here.
17854 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17855 int VSNum = atoi(Constraint.data() + 3);
17856 assert(VSNum >= 0 && VSNum <= 63 &&
17857 "Attempted to access a vsr out of range");
17858 if (VSNum < 32)
17859 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17860 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17861 }
17862
17863 // For float registers, we can't defer to the base class as it will match
17864 // the SPILLTOVSRRC class.
17865 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17866 int RegNum = atoi(Constraint.data() + 2);
17867 if (RegNum > 31 || RegNum < 0)
17868 report_fatal_error("Invalid floating point register number");
17869 if (VT == MVT::f32 || VT == MVT::i32)
17870 return Subtarget.hasSPE()
17871 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17872 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17873 if (VT == MVT::f64 || VT == MVT::i64)
17874 return Subtarget.hasSPE()
17875 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17876 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17877 }
17878 }
17879
17880 std::pair<unsigned, const TargetRegisterClass *> R =
17882
17883 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17884 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17885 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17886 // register.
17887 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17888 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17889 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17890 PPC::GPRCRegClass.contains(R.first))
17891 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17892 PPC::sub_32, &PPC::G8RCRegClass),
17893 &PPC::G8RCRegClass);
17894
17895 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17896 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17897 R.first = PPC::CR0;
17898 R.second = &PPC::CRRCRegClass;
17899 }
17900 // FIXME: This warning should ideally be emitted in the front end.
17901 const auto &TM = getTargetMachine();
17902 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17903 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17904 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17905 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17906 errs() << "warning: vector registers 20 to 32 are reserved in the "
17907 "default AIX AltiVec ABI and cannot be used\n";
17908 }
17909
17910 return R;
17911}
17912
17913/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17914/// vector. If it is invalid, don't add anything to Ops.
17916 StringRef Constraint,
17917 std::vector<SDValue> &Ops,
17918 SelectionDAG &DAG) const {
17919 SDValue Result;
17920
17921 // Only support length 1 constraints.
17922 if (Constraint.size() > 1)
17923 return;
17924
17925 char Letter = Constraint[0];
17926 switch (Letter) {
17927 default: break;
17928 case 'I':
17929 case 'J':
17930 case 'K':
17931 case 'L':
17932 case 'M':
17933 case 'N':
17934 case 'O':
17935 case 'P': {
17936 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17937 if (!CST) return; // Must be an immediate to match.
17938 SDLoc dl(Op);
17939 int64_t Value = CST->getSExtValue();
17940 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17941 // numbers are printed as such.
17942 switch (Letter) {
17943 default: llvm_unreachable("Unknown constraint letter!");
17944 case 'I': // "I" is a signed 16-bit constant.
17945 if (isInt<16>(Value))
17946 Result = DAG.getTargetConstant(Value, dl, TCVT);
17947 break;
17948 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17949 if (isShiftedUInt<16, 16>(Value))
17950 Result = DAG.getTargetConstant(Value, dl, TCVT);
17951 break;
17952 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17953 if (isShiftedInt<16, 16>(Value))
17954 Result = DAG.getTargetConstant(Value, dl, TCVT);
17955 break;
17956 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17957 if (isUInt<16>(Value))
17958 Result = DAG.getTargetConstant(Value, dl, TCVT);
17959 break;
17960 case 'M': // "M" is a constant that is greater than 31.
17961 if (Value > 31)
17962 Result = DAG.getTargetConstant(Value, dl, TCVT);
17963 break;
17964 case 'N': // "N" is a positive constant that is an exact power of two.
17965 if (Value > 0 && isPowerOf2_64(Value))
17966 Result = DAG.getTargetConstant(Value, dl, TCVT);
17967 break;
17968 case 'O': // "O" is the constant zero.
17969 if (Value == 0)
17970 Result = DAG.getTargetConstant(Value, dl, TCVT);
17971 break;
17972 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17973 if (isInt<16>(-Value))
17974 Result = DAG.getTargetConstant(Value, dl, TCVT);
17975 break;
17976 }
17977 break;
17978 }
17979 }
17980
17981 if (Result.getNode()) {
17982 Ops.push_back(Result);
17983 return;
17984 }
17985
17986 // Handle standard constraint letters.
17987 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17988}
17989
17992 SelectionDAG &DAG) const {
17993 if (I.getNumOperands() <= 1)
17994 return;
17995 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17996 return;
17997 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17998 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17999 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18000 return;
18001
18002 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18003 Ops.push_back(DAG.getMDNode(MDN));
18004}
18005
18006// isLegalAddressingMode - Return true if the addressing mode represented
18007// by AM is legal for this target, for a load/store of the specified type.
18009 const AddrMode &AM, Type *Ty,
18010 unsigned AS,
18011 Instruction *I) const {
18012 // Vector type r+i form is supported since power9 as DQ form. We don't check
18013 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18014 // imm form is preferred and the offset can be adjusted to use imm form later
18015 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18016 // max offset to check legal addressing mode, we should be a little aggressive
18017 // to contain other offsets for that LSRUse.
18018 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18019 return false;
18020
18021 // PPC allows a sign-extended 16-bit immediate field.
18022 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18023 return false;
18024
18025 // No global is ever allowed as a base.
18026 if (AM.BaseGV)
18027 return false;
18028
18029 // PPC only support r+r,
18030 switch (AM.Scale) {
18031 case 0: // "r+i" or just "i", depending on HasBaseReg.
18032 break;
18033 case 1:
18034 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18035 return false;
18036 // Otherwise we have r+r or r+i.
18037 break;
18038 case 2:
18039 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18040 return false;
18041 // Allow 2*r as r+r.
18042 break;
18043 default:
18044 // No other scales are supported.
18045 return false;
18046 }
18047
18048 return true;
18049}
18050
18051SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18052 SelectionDAG &DAG) const {
18054 MachineFrameInfo &MFI = MF.getFrameInfo();
18055 MFI.setReturnAddressIsTaken(true);
18056
18057 SDLoc dl(Op);
18058 unsigned Depth = Op.getConstantOperandVal(0);
18059
18060 // Make sure the function does not optimize away the store of the RA to
18061 // the stack.
18062 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18063 FuncInfo->setLRStoreRequired();
18064 auto PtrVT = getPointerTy(MF.getDataLayout());
18065
18066 if (Depth > 0) {
18067 // The link register (return address) is saved in the caller's frame
18068 // not the callee's stack frame. So we must get the caller's frame
18069 // address and load the return address at the LR offset from there.
18070 SDValue FrameAddr =
18071 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18072 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
18073 SDValue Offset =
18074 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18075 Subtarget.getScalarIntVT());
18076 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18077 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18079 }
18080
18081 // Just load the return address off the stack.
18082 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18083 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18085}
18086
18087SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18088 SelectionDAG &DAG) const {
18089 SDLoc dl(Op);
18090 unsigned Depth = Op.getConstantOperandVal(0);
18091
18093 MachineFrameInfo &MFI = MF.getFrameInfo();
18094 MFI.setFrameAddressIsTaken(true);
18095
18096 EVT PtrVT = getPointerTy(MF.getDataLayout());
18097 bool isPPC64 = PtrVT == MVT::i64;
18098
18099 // Naked functions never have a frame pointer, and so we use r1. For all
18100 // other functions, this decision must be delayed until during PEI.
18101 unsigned FrameReg;
18102 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18103 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18104 else
18105 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18106
18107 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18108 PtrVT);
18109 while (Depth--)
18110 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18111 FrameAddr, MachinePointerInfo());
18112 return FrameAddr;
18113}
18114
18115#define GET_REGISTER_MATCHER
18116#include "PPCGenAsmMatcher.inc"
18117
18119 const MachineFunction &MF) const {
18120 bool IsPPC64 = Subtarget.isPPC64();
18121
18122 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18123 if (!Is64Bit && VT != LLT::scalar(32))
18124 report_fatal_error("Invalid register global variable type");
18125
18127 if (!Reg)
18128 return Reg;
18129
18130 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18131 // Need followup investigation as to why.
18132 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18133 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18134 StringRef(RegName) + "\"."));
18135
18136 // Convert GPR to GP8R register for 64bit.
18137 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18138 Reg = Reg.id() - PPC::R0 + PPC::X0;
18139
18140 return Reg;
18141}
18142
18144 // 32-bit SVR4 ABI access everything as got-indirect.
18145 if (Subtarget.is32BitELFABI())
18146 return true;
18147
18148 // AIX accesses everything indirectly through the TOC, which is similar to
18149 // the GOT.
18150 if (Subtarget.isAIXABI())
18151 return true;
18152
18154 // If it is small or large code model, module locals are accessed
18155 // indirectly by loading their address from .toc/.got.
18156 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18157 return true;
18158
18159 // JumpTable and BlockAddress are accessed as got-indirect.
18160 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
18161 return true;
18162
18163 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
18164 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18165
18166 return false;
18167}
18168
18169bool
18171 // The PowerPC target isn't yet aware of offsets.
18172 return false;
18173}
18174
18176 const CallInst &I,
18177 MachineFunction &MF,
18178 unsigned Intrinsic) const {
18179 switch (Intrinsic) {
18180 case Intrinsic::ppc_atomicrmw_xchg_i128:
18181 case Intrinsic::ppc_atomicrmw_add_i128:
18182 case Intrinsic::ppc_atomicrmw_sub_i128:
18183 case Intrinsic::ppc_atomicrmw_nand_i128:
18184 case Intrinsic::ppc_atomicrmw_and_i128:
18185 case Intrinsic::ppc_atomicrmw_or_i128:
18186 case Intrinsic::ppc_atomicrmw_xor_i128:
18187 case Intrinsic::ppc_cmpxchg_i128:
18189 Info.memVT = MVT::i128;
18190 Info.ptrVal = I.getArgOperand(0);
18191 Info.offset = 0;
18192 Info.align = Align(16);
18195 return true;
18196 case Intrinsic::ppc_atomic_load_i128:
18198 Info.memVT = MVT::i128;
18199 Info.ptrVal = I.getArgOperand(0);
18200 Info.offset = 0;
18201 Info.align = Align(16);
18203 return true;
18204 case Intrinsic::ppc_atomic_store_i128:
18206 Info.memVT = MVT::i128;
18207 Info.ptrVal = I.getArgOperand(2);
18208 Info.offset = 0;
18209 Info.align = Align(16);
18211 return true;
18212 case Intrinsic::ppc_altivec_lvx:
18213 case Intrinsic::ppc_altivec_lvxl:
18214 case Intrinsic::ppc_altivec_lvebx:
18215 case Intrinsic::ppc_altivec_lvehx:
18216 case Intrinsic::ppc_altivec_lvewx:
18217 case Intrinsic::ppc_vsx_lxvd2x:
18218 case Intrinsic::ppc_vsx_lxvw4x:
18219 case Intrinsic::ppc_vsx_lxvd2x_be:
18220 case Intrinsic::ppc_vsx_lxvw4x_be:
18221 case Intrinsic::ppc_vsx_lxvl:
18222 case Intrinsic::ppc_vsx_lxvll: {
18223 EVT VT;
18224 switch (Intrinsic) {
18225 case Intrinsic::ppc_altivec_lvebx:
18226 VT = MVT::i8;
18227 break;
18228 case Intrinsic::ppc_altivec_lvehx:
18229 VT = MVT::i16;
18230 break;
18231 case Intrinsic::ppc_altivec_lvewx:
18232 VT = MVT::i32;
18233 break;
18234 case Intrinsic::ppc_vsx_lxvd2x:
18235 case Intrinsic::ppc_vsx_lxvd2x_be:
18236 VT = MVT::v2f64;
18237 break;
18238 default:
18239 VT = MVT::v4i32;
18240 break;
18241 }
18242
18244 Info.memVT = VT;
18245 Info.ptrVal = I.getArgOperand(0);
18246 Info.offset = -VT.getStoreSize()+1;
18247 Info.size = 2*VT.getStoreSize()-1;
18248 Info.align = Align(1);
18250 return true;
18251 }
18252 case Intrinsic::ppc_altivec_stvx:
18253 case Intrinsic::ppc_altivec_stvxl:
18254 case Intrinsic::ppc_altivec_stvebx:
18255 case Intrinsic::ppc_altivec_stvehx:
18256 case Intrinsic::ppc_altivec_stvewx:
18257 case Intrinsic::ppc_vsx_stxvd2x:
18258 case Intrinsic::ppc_vsx_stxvw4x:
18259 case Intrinsic::ppc_vsx_stxvd2x_be:
18260 case Intrinsic::ppc_vsx_stxvw4x_be:
18261 case Intrinsic::ppc_vsx_stxvl:
18262 case Intrinsic::ppc_vsx_stxvll: {
18263 EVT VT;
18264 switch (Intrinsic) {
18265 case Intrinsic::ppc_altivec_stvebx:
18266 VT = MVT::i8;
18267 break;
18268 case Intrinsic::ppc_altivec_stvehx:
18269 VT = MVT::i16;
18270 break;
18271 case Intrinsic::ppc_altivec_stvewx:
18272 VT = MVT::i32;
18273 break;
18274 case Intrinsic::ppc_vsx_stxvd2x:
18275 case Intrinsic::ppc_vsx_stxvd2x_be:
18276 VT = MVT::v2f64;
18277 break;
18278 default:
18279 VT = MVT::v4i32;
18280 break;
18281 }
18282
18284 Info.memVT = VT;
18285 Info.ptrVal = I.getArgOperand(1);
18286 Info.offset = -VT.getStoreSize()+1;
18287 Info.size = 2*VT.getStoreSize()-1;
18288 Info.align = Align(1);
18290 return true;
18291 }
18292 case Intrinsic::ppc_stdcx:
18293 case Intrinsic::ppc_stwcx:
18294 case Intrinsic::ppc_sthcx:
18295 case Intrinsic::ppc_stbcx: {
18296 EVT VT;
18297 auto Alignment = Align(8);
18298 switch (Intrinsic) {
18299 case Intrinsic::ppc_stdcx:
18300 VT = MVT::i64;
18301 break;
18302 case Intrinsic::ppc_stwcx:
18303 VT = MVT::i32;
18304 Alignment = Align(4);
18305 break;
18306 case Intrinsic::ppc_sthcx:
18307 VT = MVT::i16;
18308 Alignment = Align(2);
18309 break;
18310 case Intrinsic::ppc_stbcx:
18311 VT = MVT::i8;
18312 Alignment = Align(1);
18313 break;
18314 }
18316 Info.memVT = VT;
18317 Info.ptrVal = I.getArgOperand(0);
18318 Info.offset = 0;
18319 Info.align = Alignment;
18321 return true;
18322 }
18323 default:
18324 break;
18325 }
18326
18327 return false;
18328}
18329
18330/// It returns EVT::Other if the type should be determined using generic
18331/// target-independent logic.
18333 LLVMContext &Context, const MemOp &Op,
18334 const AttributeList &FuncAttributes) const {
18335 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18336 // We should use Altivec/VSX loads and stores when available. For unaligned
18337 // addresses, unaligned VSX loads are only fast starting with the P8.
18338 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18339 if (Op.isMemset() && Subtarget.hasVSX()) {
18340 uint64_t TailSize = Op.size() % 16;
18341 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18342 // element if vector element type matches tail store. For tail size
18343 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18344 if (TailSize > 2 && TailSize <= 4) {
18345 return MVT::v8i16;
18346 }
18347 return MVT::v4i32;
18348 }
18349 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18350 return MVT::v4i32;
18351 }
18352 }
18353
18354 if (Subtarget.isPPC64()) {
18355 return MVT::i64;
18356 }
18357
18358 return MVT::i32;
18359}
18360
18361/// Returns true if it is beneficial to convert a load of a constant
18362/// to just the constant itself.
18364 Type *Ty) const {
18365 assert(Ty->isIntegerTy());
18366
18367 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18368 return !(BitSize == 0 || BitSize > 64);
18369}
18370
18372 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18373 return false;
18374 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18375 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18376 return NumBits1 == 64 && NumBits2 == 32;
18377}
18378
18380 if (!VT1.isInteger() || !VT2.isInteger())
18381 return false;
18382 unsigned NumBits1 = VT1.getSizeInBits();
18383 unsigned NumBits2 = VT2.getSizeInBits();
18384 return NumBits1 == 64 && NumBits2 == 32;
18385}
18386
18388 // Generally speaking, zexts are not free, but they are free when they can be
18389 // folded with other operations.
18390 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18391 EVT MemVT = LD->getMemoryVT();
18392 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18393 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18394 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18395 LD->getExtensionType() == ISD::ZEXTLOAD))
18396 return true;
18397 }
18398
18399 // FIXME: Add other cases...
18400 // - 32-bit shifts with a zext to i64
18401 // - zext after ctlz, bswap, etc.
18402 // - zext after and by a constant mask
18403
18404 return TargetLowering::isZExtFree(Val, VT2);
18405}
18406
18407bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18408 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18409 "invalid fpext types");
18410 // Extending to float128 is not free.
18411 if (DestVT == MVT::f128)
18412 return false;
18413 return true;
18414}
18415
18417 return isInt<16>(Imm) || isUInt<16>(Imm);
18418}
18419
18421 return isInt<16>(Imm) || isUInt<16>(Imm);
18422}
18423
18426 unsigned *Fast) const {
18428 return false;
18429
18430 // PowerPC supports unaligned memory access for simple non-vector types.
18431 // Although accessing unaligned addresses is not as efficient as accessing
18432 // aligned addresses, it is generally more efficient than manual expansion,
18433 // and generally only traps for software emulation when crossing page
18434 // boundaries.
18435
18436 if (!VT.isSimple())
18437 return false;
18438
18439 if (VT.isFloatingPoint() && !VT.isVector() &&
18440 !Subtarget.allowsUnalignedFPAccess())
18441 return false;
18442
18443 if (VT.getSimpleVT().isVector()) {
18444 if (Subtarget.hasVSX()) {
18445 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18446 VT != MVT::v4f32 && VT != MVT::v4i32)
18447 return false;
18448 } else {
18449 return false;
18450 }
18451 }
18452
18453 if (VT == MVT::ppcf128)
18454 return false;
18455
18456 if (Fast)
18457 *Fast = 1;
18458
18459 return true;
18460}
18461
18463 SDValue C) const {
18464 // Check integral scalar types.
18465 if (!VT.isScalarInteger())
18466 return false;
18467 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18468 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18469 return false;
18470 // This transformation will generate >= 2 operations. But the following
18471 // cases will generate <= 2 instructions during ISEL. So exclude them.
18472 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18473 // HW instruction, ie. MULLI
18474 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18475 // instruction is needed than case 1, ie. MULLI and RLDICR
18476 int64_t Imm = ConstNode->getSExtValue();
18477 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18478 Imm >>= Shift;
18479 if (isInt<16>(Imm))
18480 return false;
18481 uint64_t UImm = static_cast<uint64_t>(Imm);
18482 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18483 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18484 return true;
18485 }
18486 return false;
18487}
18488
18490 EVT VT) const {
18493}
18494
18496 Type *Ty) const {
18497 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18498 return false;
18499 switch (Ty->getScalarType()->getTypeID()) {
18500 case Type::FloatTyID:
18501 case Type::DoubleTyID:
18502 return true;
18503 case Type::FP128TyID:
18504 return Subtarget.hasP9Vector();
18505 default:
18506 return false;
18507 }
18508}
18509
18510// FIXME: add more patterns which are not profitable to hoist.
18512 if (!I->hasOneUse())
18513 return true;
18514
18515 Instruction *User = I->user_back();
18516 assert(User && "A single use instruction with no uses.");
18517
18518 switch (I->getOpcode()) {
18519 case Instruction::FMul: {
18520 // Don't break FMA, PowerPC prefers FMA.
18521 if (User->getOpcode() != Instruction::FSub &&
18522 User->getOpcode() != Instruction::FAdd)
18523 return true;
18524
18526 const Function *F = I->getFunction();
18527 const DataLayout &DL = F->getDataLayout();
18528 Type *Ty = User->getOperand(0)->getType();
18529
18530 return !(
18533 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
18534 }
18535 case Instruction::Load: {
18536 // Don't break "store (load float*)" pattern, this pattern will be combined
18537 // to "store (load int32)" in later InstCombine pass. See function
18538 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18539 // cycles than loading a 32 bit integer.
18540 LoadInst *LI = cast<LoadInst>(I);
18541 // For the loads that combineLoadToOperationType does nothing, like
18542 // ordered load, it should be profitable to hoist them.
18543 // For swifterror load, it can only be used for pointer to pointer type, so
18544 // later type check should get rid of this case.
18545 if (!LI->isUnordered())
18546 return true;
18547
18548 if (User->getOpcode() != Instruction::Store)
18549 return true;
18550
18551 if (I->getType()->getTypeID() != Type::FloatTyID)
18552 return true;
18553
18554 return false;
18555 }
18556 default:
18557 return true;
18558 }
18559 return true;
18560}
18561
18562const MCPhysReg *
18564 // LR is a callee-save register, but we must treat it as clobbered by any call
18565 // site. Hence we include LR in the scratch registers, which are in turn added
18566 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18567 // to CTR, which is used by any indirect call.
18568 static const MCPhysReg ScratchRegs[] = {
18569 PPC::X12, PPC::LR8, PPC::CTR8, 0
18570 };
18571
18572 return ScratchRegs;
18573}
18574
18576 const Constant *PersonalityFn) const {
18577 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18578}
18579
18581 const Constant *PersonalityFn) const {
18582 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18583}
18584
18585bool
18587 EVT VT , unsigned DefinedValues) const {
18588 if (VT == MVT::v2i64)
18589 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18590
18591 if (Subtarget.hasVSX())
18592 return true;
18593
18595}
18596
18598 if (DisableILPPref || Subtarget.enableMachineScheduler())
18600
18601 return Sched::ILP;
18602}
18603
18604// Create a fast isel object.
18605FastISel *
18607 const TargetLibraryInfo *LibInfo) const {
18608 return PPC::createFastISel(FuncInfo, LibInfo);
18609}
18610
18611// 'Inverted' means the FMA opcode after negating one multiplicand.
18612// For example, (fma -a b c) = (fnmsub a b c)
18613static unsigned invertFMAOpcode(unsigned Opc) {
18614 switch (Opc) {
18615 default:
18616 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18617 case ISD::FMA:
18618 return PPCISD::FNMSUB;
18619 case PPCISD::FNMSUB:
18620 return ISD::FMA;
18621 }
18622}
18623
18625 bool LegalOps, bool OptForSize,
18627 unsigned Depth) const {
18629 return SDValue();
18630
18631 unsigned Opc = Op.getOpcode();
18632 EVT VT = Op.getValueType();
18633 SDNodeFlags Flags = Op.getNode()->getFlags();
18634
18635 switch (Opc) {
18636 case PPCISD::FNMSUB:
18637 if (!Op.hasOneUse() || !isTypeLegal(VT))
18638 break;
18639
18641 SDValue N0 = Op.getOperand(0);
18642 SDValue N1 = Op.getOperand(1);
18643 SDValue N2 = Op.getOperand(2);
18644 SDLoc Loc(Op);
18645
18647 SDValue NegN2 =
18648 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18649
18650 if (!NegN2)
18651 return SDValue();
18652
18653 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18654 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18655 // These transformations may change sign of zeroes. For example,
18656 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18657 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18658 // Try and choose the cheaper one to negate.
18660 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18661 N0Cost, Depth + 1);
18662
18664 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18665 N1Cost, Depth + 1);
18666
18667 if (NegN0 && N0Cost <= N1Cost) {
18668 Cost = std::min(N0Cost, N2Cost);
18669 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18670 } else if (NegN1) {
18671 Cost = std::min(N1Cost, N2Cost);
18672 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18673 }
18674 }
18675
18676 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18677 if (isOperationLegal(ISD::FMA, VT)) {
18678 Cost = N2Cost;
18679 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18680 }
18681
18682 break;
18683 }
18684
18685 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18686 Cost, Depth);
18687}
18688
18689// Override to enable LOAD_STACK_GUARD lowering on Linux.
18691 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18692 return true;
18694}
18695
18697 bool ForCodeSize) const {
18698 if (!VT.isSimple() || !Subtarget.hasVSX())
18699 return false;
18700
18701 switch(VT.getSimpleVT().SimpleTy) {
18702 default:
18703 // For FP types that are currently not supported by PPC backend, return
18704 // false. Examples: f16, f80.
18705 return false;
18706 case MVT::f32:
18707 case MVT::f64: {
18708 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18709 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18710 return true;
18711 }
18712 bool IsExact;
18713 APSInt IntResult(16, false);
18714 // The rounding mode doesn't really matter because we only care about floats
18715 // that can be converted to integers exactly.
18716 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18717 // For exact values in the range [-16, 15] we can materialize the float.
18718 if (IsExact && IntResult <= 15 && IntResult >= -16)
18719 return true;
18720 return Imm.isZero();
18721 }
18722 case MVT::ppcf128:
18723 return Imm.isPosZero();
18724 }
18725}
18726
18727// For vector shift operation op, fold
18728// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18730 SelectionDAG &DAG) {
18731 SDValue N0 = N->getOperand(0);
18732 SDValue N1 = N->getOperand(1);
18733 EVT VT = N0.getValueType();
18734 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18735 unsigned Opcode = N->getOpcode();
18736 unsigned TargetOpcode;
18737
18738 switch (Opcode) {
18739 default:
18740 llvm_unreachable("Unexpected shift operation");
18741 case ISD::SHL:
18742 TargetOpcode = PPCISD::SHL;
18743 break;
18744 case ISD::SRL:
18745 TargetOpcode = PPCISD::SRL;
18746 break;
18747 case ISD::SRA:
18748 TargetOpcode = PPCISD::SRA;
18749 break;
18750 }
18751
18752 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18753 N1->getOpcode() == ISD::AND)
18754 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18755 if (Mask->getZExtValue() == OpSizeInBits - 1)
18756 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18757
18758 return SDValue();
18759}
18760
18761SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
18762 DAGCombinerInfo &DCI) const {
18763 EVT VT = N->getValueType(0);
18764 assert(VT.isVector() && "Vector type expected.");
18765
18766 unsigned Opc = N->getOpcode();
18767 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
18768 "Unexpected opcode.");
18769
18770 if (!isOperationLegal(Opc, VT))
18771 return SDValue();
18772
18773 EVT EltTy = VT.getScalarType();
18774 unsigned EltBits = EltTy.getSizeInBits();
18775 if (EltTy != MVT::i64 && EltTy != MVT::i32)
18776 return SDValue();
18777
18778 SDValue N1 = N->getOperand(1);
18779 uint64_t SplatBits = 0;
18780 bool AddSplatCase = false;
18781 unsigned OpcN1 = N1.getOpcode();
18782 if (OpcN1 == PPCISD::VADD_SPLAT &&
18784 AddSplatCase = true;
18785 SplatBits = N1.getConstantOperandVal(0);
18786 }
18787
18788 if (!AddSplatCase) {
18789 if (OpcN1 != ISD::BUILD_VECTOR)
18790 return SDValue();
18791
18792 unsigned SplatBitSize;
18793 bool HasAnyUndefs;
18794 APInt APSplatBits, APSplatUndef;
18795 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
18796 bool BVNIsConstantSplat =
18797 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
18798 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
18799 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
18800 return SDValue();
18801 SplatBits = APSplatBits.getZExtValue();
18802 }
18803
18804 SDLoc DL(N);
18805 SDValue N0 = N->getOperand(0);
18806 // PPC vector shifts by word/double look at only the low 5/6 bits of the
18807 // shift vector, which means the max value is 31/63. A shift vector of all
18808 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
18809 // -16 to 15 range.
18810 if (SplatBits == (EltBits - 1)) {
18811 unsigned NewOpc;
18812 switch (Opc) {
18813 case ISD::SHL:
18814 NewOpc = PPCISD::SHL;
18815 break;
18816 case ISD::SRL:
18817 NewOpc = PPCISD::SRL;
18818 break;
18819 case ISD::SRA:
18820 NewOpc = PPCISD::SRA;
18821 break;
18822 }
18823 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
18824 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
18825 }
18826
18827 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
18828 return SDValue();
18829
18830 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
18831 // before the BUILD_VECTOR is replaced by a load.
18832 if (EltTy != MVT::i64 || SplatBits != 1)
18833 return SDValue();
18834
18835 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
18836}
18837
18838SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18839 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18840 return Value;
18841
18842 if (N->getValueType(0).isVector())
18843 return combineVectorShift(N, DCI);
18844
18845 SDValue N0 = N->getOperand(0);
18846 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18847 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18848 N0.getOpcode() != ISD::SIGN_EXTEND ||
18849 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18850 N->getValueType(0) != MVT::i64)
18851 return SDValue();
18852
18853 // We can't save an operation here if the value is already extended, and
18854 // the existing shift is easier to combine.
18855 SDValue ExtsSrc = N0.getOperand(0);
18856 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18857 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18858 return SDValue();
18859
18860 SDLoc DL(N0);
18861 SDValue ShiftBy = SDValue(CN1, 0);
18862 // We want the shift amount to be i32 on the extswli, but the shift could
18863 // have an i64.
18864 if (ShiftBy.getValueType() == MVT::i64)
18865 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18866
18867 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18868 ShiftBy);
18869}
18870
18871SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18872 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18873 return Value;
18874
18875 if (N->getValueType(0).isVector())
18876 return combineVectorShift(N, DCI);
18877
18878 return SDValue();
18879}
18880
18881SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18882 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18883 return Value;
18884
18885 if (N->getValueType(0).isVector())
18886 return combineVectorShift(N, DCI);
18887
18888 return SDValue();
18889}
18890
18891// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18892// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18893// When C is zero, the equation (addi Z, -C) can be simplified to Z
18894// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18896 const PPCSubtarget &Subtarget) {
18897 if (!Subtarget.isPPC64())
18898 return SDValue();
18899
18900 SDValue LHS = N->getOperand(0);
18901 SDValue RHS = N->getOperand(1);
18902
18903 auto isZextOfCompareWithConstant = [](SDValue Op) {
18904 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18905 Op.getValueType() != MVT::i64)
18906 return false;
18907
18908 SDValue Cmp = Op.getOperand(0);
18909 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18910 Cmp.getOperand(0).getValueType() != MVT::i64)
18911 return false;
18912
18913 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18914 int64_t NegConstant = 0 - Constant->getSExtValue();
18915 // Due to the limitations of the addi instruction,
18916 // -C is required to be [-32768, 32767].
18917 return isInt<16>(NegConstant);
18918 }
18919
18920 return false;
18921 };
18922
18923 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18924 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18925
18926 // If there is a pattern, canonicalize a zext operand to the RHS.
18927 if (LHSHasPattern && !RHSHasPattern)
18928 std::swap(LHS, RHS);
18929 else if (!LHSHasPattern && !RHSHasPattern)
18930 return SDValue();
18931
18932 SDLoc DL(N);
18933 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
18934 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
18935 SDValue Cmp = RHS.getOperand(0);
18936 SDValue Z = Cmp.getOperand(0);
18937 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18938 int64_t NegConstant = 0 - Constant->getSExtValue();
18939
18940 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18941 default: break;
18942 case ISD::SETNE: {
18943 // when C == 0
18944 // --> addze X, (addic Z, -1).carry
18945 // /
18946 // add X, (zext(setne Z, C))--
18947 // \ when -32768 <= -C <= 32767 && C != 0
18948 // --> addze X, (addic (addi Z, -C), -1).carry
18949 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18950 DAG.getConstant(NegConstant, DL, MVT::i64));
18951 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18952 SDValue Addc =
18953 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
18954 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
18955 DAG.getConstant(0, DL, CarryType));
18956 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
18957 DAG.getConstant(0, DL, MVT::i64),
18958 SDValue(Addc.getNode(), 1));
18959 }
18960 case ISD::SETEQ: {
18961 // when C == 0
18962 // --> addze X, (subfic Z, 0).carry
18963 // /
18964 // add X, (zext(sete Z, C))--
18965 // \ when -32768 <= -C <= 32767 && C != 0
18966 // --> addze X, (subfic (addi Z, -C), 0).carry
18967 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18968 DAG.getConstant(NegConstant, DL, MVT::i64));
18969 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18970 SDValue Subc =
18971 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
18972 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
18973 DAG.getConstant(0, DL, CarryType));
18974 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
18975 DAG.getConstant(1UL, DL, CarryType));
18976 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
18977 DAG.getConstant(0, DL, MVT::i64), Invert);
18978 }
18979 }
18980
18981 return SDValue();
18982}
18983
18984// Transform
18985// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18986// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18987// In this case both C1 and C2 must be known constants.
18988// C1+C2 must fit into a 34 bit signed integer.
18990 const PPCSubtarget &Subtarget) {
18991 if (!Subtarget.isUsingPCRelativeCalls())
18992 return SDValue();
18993
18994 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18995 // If we find that node try to cast the Global Address and the Constant.
18996 SDValue LHS = N->getOperand(0);
18997 SDValue RHS = N->getOperand(1);
18998
18999 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19000 std::swap(LHS, RHS);
19001
19002 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19003 return SDValue();
19004
19005 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19006 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
19007 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
19008
19009 // Check that both casts succeeded.
19010 if (!GSDN || !ConstNode)
19011 return SDValue();
19012
19013 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19014 SDLoc DL(GSDN);
19015
19016 // The signed int offset needs to fit in 34 bits.
19017 if (!isInt<34>(NewOffset))
19018 return SDValue();
19019
19020 // The new global address is a copy of the old global address except
19021 // that it has the updated Offset.
19022 SDValue GA =
19023 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19024 NewOffset, GSDN->getTargetFlags());
19025 SDValue MatPCRel =
19026 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19027 return MatPCRel;
19028}
19029
19030SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19031 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19032 return Value;
19033
19034 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19035 return Value;
19036
19037 return SDValue();
19038}
19039
19040// Detect TRUNCATE operations on bitcasts of float128 values.
19041// What we are looking for here is the situtation where we extract a subset
19042// of bits from a 128 bit float.
19043// This can be of two forms:
19044// 1) BITCAST of f128 feeding TRUNCATE
19045// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19046// The reason this is required is because we do not have a legal i128 type
19047// and so we want to prevent having to store the f128 and then reload part
19048// of it.
19049SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19050 DAGCombinerInfo &DCI) const {
19051 // If we are using CRBits then try that first.
19052 if (Subtarget.useCRBits()) {
19053 // Check if CRBits did anything and return that if it did.
19054 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19055 return CRTruncValue;
19056 }
19057
19058 SDLoc dl(N);
19059 SDValue Op0 = N->getOperand(0);
19060
19061 // Looking for a truncate of i128 to i64.
19062 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19063 return SDValue();
19064
19065 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19066
19067 // SRL feeding TRUNCATE.
19068 if (Op0.getOpcode() == ISD::SRL) {
19069 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19070 // The right shift has to be by 64 bits.
19071 if (!ConstNode || ConstNode->getZExtValue() != 64)
19072 return SDValue();
19073
19074 // Switch the element number to extract.
19075 EltToExtract = EltToExtract ? 0 : 1;
19076 // Update Op0 past the SRL.
19077 Op0 = Op0.getOperand(0);
19078 }
19079
19080 // BITCAST feeding a TRUNCATE possibly via SRL.
19081 if (Op0.getOpcode() == ISD::BITCAST &&
19082 Op0.getValueType() == MVT::i128 &&
19083 Op0.getOperand(0).getValueType() == MVT::f128) {
19084 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19085 return DCI.DAG.getNode(
19086 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19087 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19088 }
19089 return SDValue();
19090}
19091
19092SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19093 SelectionDAG &DAG = DCI.DAG;
19094
19095 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19096 if (!ConstOpOrElement)
19097 return SDValue();
19098
19099 // An imul is usually smaller than the alternative sequence for legal type.
19101 isOperationLegal(ISD::MUL, N->getValueType(0)))
19102 return SDValue();
19103
19104 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19105 switch (this->Subtarget.getCPUDirective()) {
19106 default:
19107 // TODO: enhance the condition for subtarget before pwr8
19108 return false;
19109 case PPC::DIR_PWR8:
19110 // type mul add shl
19111 // scalar 4 1 1
19112 // vector 7 2 2
19113 return true;
19114 case PPC::DIR_PWR9:
19115 case PPC::DIR_PWR10:
19116 case PPC::DIR_PWR11:
19118 // type mul add shl
19119 // scalar 5 2 2
19120 // vector 7 2 2
19121
19122 // The cycle RATIO of related operations are showed as a table above.
19123 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19124 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19125 // are 4, it is always profitable; but for 3 instrs patterns
19126 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19127 // So we should only do it for vector type.
19128 return IsAddOne && IsNeg ? VT.isVector() : true;
19129 }
19130 };
19131
19132 EVT VT = N->getValueType(0);
19133 SDLoc DL(N);
19134
19135 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19136 bool IsNeg = MulAmt.isNegative();
19137 APInt MulAmtAbs = MulAmt.abs();
19138
19139 if ((MulAmtAbs - 1).isPowerOf2()) {
19140 // (mul x, 2^N + 1) => (add (shl x, N), x)
19141 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19142
19143 if (!IsProfitable(IsNeg, true, VT))
19144 return SDValue();
19145
19146 SDValue Op0 = N->getOperand(0);
19147 SDValue Op1 =
19148 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19149 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19150 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19151
19152 if (!IsNeg)
19153 return Res;
19154
19155 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19156 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19157 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19158 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19159
19160 if (!IsProfitable(IsNeg, false, VT))
19161 return SDValue();
19162
19163 SDValue Op0 = N->getOperand(0);
19164 SDValue Op1 =
19165 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19166 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19167
19168 if (!IsNeg)
19169 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19170 else
19171 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19172
19173 } else {
19174 return SDValue();
19175 }
19176}
19177
19178// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19179// in combiner since we need to check SD flags and other subtarget features.
19180SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19181 DAGCombinerInfo &DCI) const {
19182 SDValue N0 = N->getOperand(0);
19183 SDValue N1 = N->getOperand(1);
19184 SDValue N2 = N->getOperand(2);
19185 SDNodeFlags Flags = N->getFlags();
19186 EVT VT = N->getValueType(0);
19187 SelectionDAG &DAG = DCI.DAG;
19189 unsigned Opc = N->getOpcode();
19191 bool LegalOps = !DCI.isBeforeLegalizeOps();
19192 SDLoc Loc(N);
19193
19194 if (!isOperationLegal(ISD::FMA, VT))
19195 return SDValue();
19196
19197 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19198 // since (fnmsub a b c)=-0 while c-ab=+0.
19199 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19200 return SDValue();
19201
19202 // (fma (fneg a) b c) => (fnmsub a b c)
19203 // (fnmsub (fneg a) b c) => (fma a b c)
19204 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19205 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19206
19207 // (fma a (fneg b) c) => (fnmsub a b c)
19208 // (fnmsub a (fneg b) c) => (fma a b c)
19209 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19210 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19211
19212 return SDValue();
19213}
19214
19215bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19216 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19217 if (!Subtarget.is64BitELFABI())
19218 return false;
19219
19220 // If not a tail call then no need to proceed.
19221 if (!CI->isTailCall())
19222 return false;
19223
19224 // If sibling calls have been disabled and tail-calls aren't guaranteed
19225 // there is no reason to duplicate.
19226 auto &TM = getTargetMachine();
19227 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19228 return false;
19229
19230 // Can't tail call a function called indirectly, or if it has variadic args.
19231 const Function *Callee = CI->getCalledFunction();
19232 if (!Callee || Callee->isVarArg())
19233 return false;
19234
19235 // Make sure the callee and caller calling conventions are eligible for tco.
19236 const Function *Caller = CI->getParent()->getParent();
19237 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19238 CI->getCallingConv()))
19239 return false;
19240
19241 // If the function is local then we have a good chance at tail-calling it
19242 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19243}
19244
19245bool PPCTargetLowering::
19246isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19247 const Value *Mask = AndI.getOperand(1);
19248 // If the mask is suitable for andi. or andis. we should sink the and.
19249 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19250 // Can't handle constants wider than 64-bits.
19251 if (CI->getBitWidth() > 64)
19252 return false;
19253 int64_t ConstVal = CI->getZExtValue();
19254 return isUInt<16>(ConstVal) ||
19255 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19256 }
19257
19258 // For non-constant masks, we can always use the record-form and.
19259 return true;
19260}
19261
19262/// getAddrModeForFlags - Based on the set of address flags, select the most
19263/// optimal instruction format to match by.
19264PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19265 // This is not a node we should be handling here.
19266 if (Flags == PPC::MOF_None)
19267 return PPC::AM_None;
19268 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19269 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19270 if ((Flags & FlagSet) == FlagSet)
19271 return PPC::AM_DForm;
19272 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19273 if ((Flags & FlagSet) == FlagSet)
19274 return PPC::AM_DSForm;
19275 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19276 if ((Flags & FlagSet) == FlagSet)
19277 return PPC::AM_DQForm;
19278 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19279 if ((Flags & FlagSet) == FlagSet)
19280 return PPC::AM_PrefixDForm;
19281 // If no other forms are selected, return an X-Form as it is the most
19282 // general addressing mode.
19283 return PPC::AM_XForm;
19284}
19285
19286/// Set alignment flags based on whether or not the Frame Index is aligned.
19287/// Utilized when computing flags for address computation when selecting
19288/// load and store instructions.
19289static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19290 SelectionDAG &DAG) {
19291 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19292 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19293 if (!FI)
19294 return;
19296 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19297 // If this is (add $FI, $S16Imm), the alignment flags are already set
19298 // based on the immediate. We just need to clear the alignment flags
19299 // if the FI alignment is weaker.
19300 if ((FrameIndexAlign % 4) != 0)
19301 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19302 if ((FrameIndexAlign % 16) != 0)
19303 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19304 // If the address is a plain FrameIndex, set alignment flags based on
19305 // FI alignment.
19306 if (!IsAdd) {
19307 if ((FrameIndexAlign % 4) == 0)
19308 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19309 if ((FrameIndexAlign % 16) == 0)
19310 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19311 }
19312}
19313
19314/// Given a node, compute flags that are used for address computation when
19315/// selecting load and store instructions. The flags computed are stored in
19316/// FlagSet. This function takes into account whether the node is a constant,
19317/// an ADD, OR, or a constant, and computes the address flags accordingly.
19318static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19319 SelectionDAG &DAG) {
19320 // Set the alignment flags for the node depending on if the node is
19321 // 4-byte or 16-byte aligned.
19322 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19323 if ((Imm & 0x3) == 0)
19324 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19325 if ((Imm & 0xf) == 0)
19326 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19327 };
19328
19329 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
19330 // All 32-bit constants can be computed as LIS + Disp.
19331 const APInt &ConstImm = CN->getAPIntValue();
19332 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19333 FlagSet |= PPC::MOF_AddrIsSImm32;
19334 SetAlignFlagsForImm(ConstImm.getZExtValue());
19335 setAlignFlagsForFI(N, FlagSet, DAG);
19336 }
19337 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19338 FlagSet |= PPC::MOF_RPlusSImm34;
19339 else // Let constant materialization handle large constants.
19340 FlagSet |= PPC::MOF_NotAddNorCst;
19341 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19342 // This address can be represented as an addition of:
19343 // - Register + Imm16 (possibly a multiple of 4/16)
19344 // - Register + Imm34
19345 // - Register + PPCISD::Lo
19346 // - Register + Register
19347 // In any case, we won't have to match this as Base + Zero.
19348 SDValue RHS = N.getOperand(1);
19349 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
19350 const APInt &ConstImm = CN->getAPIntValue();
19351 if (ConstImm.isSignedIntN(16)) {
19352 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19353 SetAlignFlagsForImm(ConstImm.getZExtValue());
19354 setAlignFlagsForFI(N, FlagSet, DAG);
19355 }
19356 if (ConstImm.isSignedIntN(34))
19357 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19358 else
19359 FlagSet |= PPC::MOF_RPlusR; // Register.
19360 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19361 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19362 else
19363 FlagSet |= PPC::MOF_RPlusR;
19364 } else { // The address computation is not a constant or an addition.
19365 setAlignFlagsForFI(N, FlagSet, DAG);
19366 FlagSet |= PPC::MOF_NotAddNorCst;
19367 }
19368}
19369
19370static bool isPCRelNode(SDValue N) {
19371 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19372 isValidPCRelNode<ConstantPoolSDNode>(N) ||
19373 isValidPCRelNode<GlobalAddressSDNode>(N) ||
19374 isValidPCRelNode<JumpTableSDNode>(N) ||
19375 isValidPCRelNode<BlockAddressSDNode>(N));
19376}
19377
19378/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19379/// the address flags of the load/store instruction that is to be matched.
19380unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19381 SelectionDAG &DAG) const {
19382 unsigned FlagSet = PPC::MOF_None;
19383
19384 // Compute subtarget flags.
19385 if (!Subtarget.hasP9Vector())
19386 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19387 else
19388 FlagSet |= PPC::MOF_SubtargetP9;
19389
19390 if (Subtarget.hasPrefixInstrs())
19391 FlagSet |= PPC::MOF_SubtargetP10;
19392
19393 if (Subtarget.hasSPE())
19394 FlagSet |= PPC::MOF_SubtargetSPE;
19395
19396 // Check if we have a PCRel node and return early.
19397 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19398 return FlagSet;
19399
19400 // If the node is the paired load/store intrinsics, compute flags for
19401 // address computation and return early.
19402 unsigned ParentOp = Parent->getOpcode();
19403 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19404 (ParentOp == ISD::INTRINSIC_VOID))) {
19405 unsigned ID = Parent->getConstantOperandVal(1);
19406 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19407 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19408 ? Parent->getOperand(2)
19409 : Parent->getOperand(3);
19410 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19411 FlagSet |= PPC::MOF_Vector;
19412 return FlagSet;
19413 }
19414 }
19415
19416 // Mark this as something we don't want to handle here if it is atomic
19417 // or pre-increment instruction.
19418 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19419 if (LSB->isIndexed())
19420 return PPC::MOF_None;
19421
19422 // Compute in-memory type flags. This is based on if there are scalars,
19423 // floats or vectors.
19424 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19425 assert(MN && "Parent should be a MemSDNode!");
19426 EVT MemVT = MN->getMemoryVT();
19427 unsigned Size = MemVT.getSizeInBits();
19428 if (MemVT.isScalarInteger()) {
19429 assert(Size <= 128 &&
19430 "Not expecting scalar integers larger than 16 bytes!");
19431 if (Size < 32)
19432 FlagSet |= PPC::MOF_SubWordInt;
19433 else if (Size == 32)
19434 FlagSet |= PPC::MOF_WordInt;
19435 else
19436 FlagSet |= PPC::MOF_DoubleWordInt;
19437 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19438 if (Size == 128)
19439 FlagSet |= PPC::MOF_Vector;
19440 else if (Size == 256) {
19441 assert(Subtarget.pairedVectorMemops() &&
19442 "256-bit vectors are only available when paired vector memops is "
19443 "enabled!");
19444 FlagSet |= PPC::MOF_Vector;
19445 } else
19446 llvm_unreachable("Not expecting illegal vectors!");
19447 } else { // Floating point type: can be scalar, f128 or vector types.
19448 if (Size == 32 || Size == 64)
19449 FlagSet |= PPC::MOF_ScalarFloat;
19450 else if (MemVT == MVT::f128 || MemVT.isVector())
19451 FlagSet |= PPC::MOF_Vector;
19452 else
19453 llvm_unreachable("Not expecting illegal scalar floats!");
19454 }
19455
19456 // Compute flags for address computation.
19457 computeFlagsForAddressComputation(N, FlagSet, DAG);
19458
19459 // Compute type extension flags.
19460 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19461 switch (LN->getExtensionType()) {
19462 case ISD::SEXTLOAD:
19463 FlagSet |= PPC::MOF_SExt;
19464 break;
19465 case ISD::EXTLOAD:
19466 case ISD::ZEXTLOAD:
19467 FlagSet |= PPC::MOF_ZExt;
19468 break;
19469 case ISD::NON_EXTLOAD:
19470 FlagSet |= PPC::MOF_NoExt;
19471 break;
19472 }
19473 } else
19474 FlagSet |= PPC::MOF_NoExt;
19475
19476 // For integers, no extension is the same as zero extension.
19477 // We set the extension mode to zero extension so we don't have
19478 // to add separate entries in AddrModesMap for loads and stores.
19479 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19480 FlagSet |= PPC::MOF_ZExt;
19481 FlagSet &= ~PPC::MOF_NoExt;
19482 }
19483
19484 // If we don't have prefixed instructions, 34-bit constants should be
19485 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19486 bool IsNonP1034BitConst =
19488 FlagSet) == PPC::MOF_RPlusSImm34;
19489 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19490 IsNonP1034BitConst)
19491 FlagSet |= PPC::MOF_NotAddNorCst;
19492
19493 return FlagSet;
19494}
19495
19496/// SelectForceXFormMode - Given the specified address, force it to be
19497/// represented as an indexed [r+r] operation (an XForm instruction).
19499 SDValue &Base,
19500 SelectionDAG &DAG) const {
19501
19503 int16_t ForceXFormImm = 0;
19504 if (provablyDisjointOr(DAG, N) &&
19505 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19506 Disp = N.getOperand(0);
19507 Base = N.getOperand(1);
19508 return Mode;
19509 }
19510
19511 // If the address is the result of an add, we will utilize the fact that the
19512 // address calculation includes an implicit add. However, we can reduce
19513 // register pressure if we do not materialize a constant just for use as the
19514 // index register. We only get rid of the add if it is not an add of a
19515 // value and a 16-bit signed constant and both have a single use.
19516 if (N.getOpcode() == ISD::ADD &&
19517 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19518 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19519 Disp = N.getOperand(0);
19520 Base = N.getOperand(1);
19521 return Mode;
19522 }
19523
19524 // Otherwise, use R0 as the base register.
19525 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19526 N.getValueType());
19527 Base = N;
19528
19529 return Mode;
19530}
19531
19533 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19534 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19535 EVT ValVT = Val.getValueType();
19536 // If we are splitting a scalar integer into f64 parts (i.e. so they
19537 // can be placed into VFRC registers), we need to zero extend and
19538 // bitcast the values. This will ensure the value is placed into a
19539 // VSR using direct moves or stack operations as needed.
19540 if (PartVT == MVT::f64 &&
19541 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19542 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19543 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19544 Parts[0] = Val;
19545 return true;
19546 }
19547 return false;
19548}
19549
19550SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19551 SelectionDAG &DAG) const {
19552 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19554 EVT RetVT = Op.getValueType();
19555 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19556 SDValue Callee =
19557 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19558 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19560 for (const SDValue &N : Op->op_values()) {
19561 EVT ArgVT = N.getValueType();
19562 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19563 TargetLowering::ArgListEntry Entry(N, ArgTy);
19564 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19565 Entry.IsZExt = !Entry.IsSExt;
19566 Args.push_back(Entry);
19567 }
19568
19569 SDValue InChain = DAG.getEntryNode();
19570 SDValue TCChain = InChain;
19571 const Function &F = DAG.getMachineFunction().getFunction();
19572 bool isTailCall =
19573 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19574 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19575 if (isTailCall)
19576 InChain = TCChain;
19577 CLI.setDebugLoc(SDLoc(Op))
19578 .setChain(InChain)
19579 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19580 .setTailCall(isTailCall)
19581 .setSExtResult(SignExtend)
19582 .setZExtResult(!SignExtend)
19584 return TLI.LowerCallTo(CLI).first;
19585}
19586
19587SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19588 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19589 SelectionDAG &DAG) const {
19590 if (Op.getValueType() == MVT::f32)
19591 return lowerToLibCall(LibCallFloatName, Op, DAG);
19592
19593 if (Op.getValueType() == MVT::f64)
19594 return lowerToLibCall(LibCallDoubleName, Op, DAG);
19595
19596 return SDValue();
19597}
19598
19599bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19600 SDNodeFlags Flags = Op.getNode()->getFlags();
19601 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19602 Flags.hasNoNaNs() && Flags.hasNoInfs();
19603}
19604
19605bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19606 return Op.getNode()->getFlags().hasApproximateFuncs();
19607}
19608
19609bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19611}
19612
19613SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19614 const char *LibCallFloatName,
19615 const char *LibCallDoubleNameFinite,
19616 const char *LibCallFloatNameFinite,
19617 SDValue Op,
19618 SelectionDAG &DAG) const {
19619 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19620 return SDValue();
19621
19622 if (!isLowringToMASSFiniteSafe(Op))
19623 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19624 DAG);
19625
19626 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
19627 LibCallDoubleNameFinite, Op, DAG);
19628}
19629
19630SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19631 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
19632 "__xl_powf_finite", Op, DAG);
19633}
19634
19635SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19636 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
19637 "__xl_sinf_finite", Op, DAG);
19638}
19639
19640SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19641 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
19642 "__xl_cosf_finite", Op, DAG);
19643}
19644
19645SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19646 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
19647 "__xl_logf_finite", Op, DAG);
19648}
19649
19650SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19651 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
19652 "__xl_log10f_finite", Op, DAG);
19653}
19654
19655SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19656 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
19657 "__xl_expf_finite", Op, DAG);
19658}
19659
19660// If we happen to match to an aligned D-Form, check if the Frame Index is
19661// adequately aligned. If it is not, reset the mode to match to X-Form.
19662static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19663 PPC::AddrMode &Mode) {
19664 if (!isa<FrameIndexSDNode>(N))
19665 return;
19666 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19667 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
19668 Mode = PPC::AM_XForm;
19669}
19670
19671/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19672/// compute the address flags of the node, get the optimal address mode based
19673/// on the flags, and set the Base and Disp based on the address mode.
19675 SDValue N, SDValue &Disp,
19676 SDValue &Base,
19677 SelectionDAG &DAG,
19678 MaybeAlign Align) const {
19679 SDLoc DL(Parent);
19680
19681 // Compute the address flags.
19682 unsigned Flags = computeMOFlags(Parent, N, DAG);
19683
19684 // Get the optimal address mode based on the Flags.
19685 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19686
19687 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19688 // Select an X-Form load if it is not.
19689 setXFormForUnalignedFI(N, Flags, Mode);
19690
19691 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19692 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19693 assert(Subtarget.isUsingPCRelativeCalls() &&
19694 "Must be using PC-Relative calls when a valid PC-Relative node is "
19695 "present!");
19696 Mode = PPC::AM_PCRel;
19697 }
19698
19699 // Set Base and Disp accordingly depending on the address mode.
19700 switch (Mode) {
19701 case PPC::AM_DForm:
19702 case PPC::AM_DSForm:
19703 case PPC::AM_DQForm: {
19704 // This is a register plus a 16-bit immediate. The base will be the
19705 // register and the displacement will be the immediate unless it
19706 // isn't sufficiently aligned.
19707 if (Flags & PPC::MOF_RPlusSImm16) {
19708 SDValue Op0 = N.getOperand(0);
19709 SDValue Op1 = N.getOperand(1);
19710 int16_t Imm = Op1->getAsZExtVal();
19711 if (!Align || isAligned(*Align, Imm)) {
19712 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19713 Base = Op0;
19714 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
19715 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19716 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19717 }
19718 break;
19719 }
19720 }
19721 // This is a register plus the @lo relocation. The base is the register
19722 // and the displacement is the global address.
19723 else if (Flags & PPC::MOF_RPlusLo) {
19724 Disp = N.getOperand(1).getOperand(0); // The global address.
19729 Base = N.getOperand(0);
19730 break;
19731 }
19732 // This is a constant address at most 32 bits. The base will be
19733 // zero or load-immediate-shifted and the displacement will be
19734 // the low 16 bits of the address.
19735 else if (Flags & PPC::MOF_AddrIsSImm32) {
19736 auto *CN = cast<ConstantSDNode>(N);
19737 EVT CNType = CN->getValueType(0);
19738 uint64_t CNImm = CN->getZExtValue();
19739 // If this address fits entirely in a 16-bit sext immediate field, codegen
19740 // this as "d, 0".
19741 int16_t Imm;
19742 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19743 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19744 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19745 CNType);
19746 break;
19747 }
19748 // Handle 32-bit sext immediate with LIS + Addr mode.
19749 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19750 (!Align || isAligned(*Align, CNImm))) {
19751 int32_t Addr = (int32_t)CNImm;
19752 // Otherwise, break this down into LIS + Disp.
19753 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19754 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
19755 MVT::i32);
19756 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19757 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19758 break;
19759 }
19760 }
19761 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19762 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19763 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
19764 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19765 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19766 } else
19767 Base = N;
19768 break;
19769 }
19770 case PPC::AM_PrefixDForm: {
19771 int64_t Imm34 = 0;
19772 unsigned Opcode = N.getOpcode();
19773 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19774 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19775 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19776 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19777 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19778 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19779 else
19780 Base = N.getOperand(0);
19781 } else if (isIntS34Immediate(N, Imm34)) {
19782 // The address is a 34-bit signed immediate.
19783 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19784 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19785 }
19786 break;
19787 }
19788 case PPC::AM_PCRel: {
19789 // When selecting PC-Relative instructions, "Base" is not utilized as
19790 // we select the address as [PC+imm].
19791 Disp = N;
19792 break;
19793 }
19794 case PPC::AM_None:
19795 break;
19796 default: { // By default, X-Form is always available to be selected.
19797 // When a frame index is not aligned, we also match by XForm.
19798 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
19799 Base = FI ? N : N.getOperand(1);
19800 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19801 N.getValueType())
19802 : N.getOperand(0);
19803 break;
19804 }
19805 }
19806 return Mode;
19807}
19808
19810 bool Return,
19811 bool IsVarArg) const {
19812 switch (CC) {
19813 case CallingConv::Cold:
19814 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19815 default:
19816 return CC_PPC64_ELF;
19817 }
19818}
19819
19821 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19822}
19823
19826 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19827 if (shouldInlineQuadwordAtomics() && Size == 128)
19829
19830 switch (AI->getOperation()) {
19836 default:
19838 }
19839
19840 llvm_unreachable("unreachable atomicrmw operation");
19841}
19842
19845 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19846 if (shouldInlineQuadwordAtomics() && Size == 128)
19849}
19850
19851static Intrinsic::ID
19853 switch (BinOp) {
19854 default:
19855 llvm_unreachable("Unexpected AtomicRMW BinOp");
19857 return Intrinsic::ppc_atomicrmw_xchg_i128;
19858 case AtomicRMWInst::Add:
19859 return Intrinsic::ppc_atomicrmw_add_i128;
19860 case AtomicRMWInst::Sub:
19861 return Intrinsic::ppc_atomicrmw_sub_i128;
19862 case AtomicRMWInst::And:
19863 return Intrinsic::ppc_atomicrmw_and_i128;
19864 case AtomicRMWInst::Or:
19865 return Intrinsic::ppc_atomicrmw_or_i128;
19866 case AtomicRMWInst::Xor:
19867 return Intrinsic::ppc_atomicrmw_xor_i128;
19869 return Intrinsic::ppc_atomicrmw_nand_i128;
19870 }
19871}
19872
19874 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19875 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19876 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19877 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19878 Type *ValTy = Incr->getType();
19879 assert(ValTy->getPrimitiveSizeInBits() == 128);
19880 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19881 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19882 Value *IncrHi =
19883 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19884 Value *LoHi = Builder.CreateIntrinsic(
19886 {AlignedAddr, IncrLo, IncrHi});
19887 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19888 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19889 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19890 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19891 return Builder.CreateOr(
19892 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19893}
19894
19896 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19897 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19898 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19899 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19900 Type *ValTy = CmpVal->getType();
19901 assert(ValTy->getPrimitiveSizeInBits() == 128);
19902 Function *IntCmpXchg =
19903 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19904 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19905 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19906 Value *CmpHi =
19907 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19908 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19909 Value *NewHi =
19910 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19911 emitLeadingFence(Builder, CI, Ord);
19912 Value *LoHi =
19913 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19914 emitTrailingFence(Builder, CI, Ord);
19915 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19916 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19917 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19918 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19919 return Builder.CreateOr(
19920 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19921}
19922
19924 return Subtarget.useCRBits();
19925}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition: IVUsers.cpp:48
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Register const TargetRegisterInfo * TRI
static bool isConstantOrUndef(const SDValue Op)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:6057
bool isDenormal() const
Definition: APFloat.h:1450
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
APInt abs() const
Get the absolute value.
Definition: APInt.h:1795
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1396
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1722
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
BinOp getOperation() const
Definition: Instructions.h:819
This is an SDNode representing atomic operations.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:899
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1911
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1406
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1267
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1340
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1273
unsigned arg_size() const
Definition: InstrTypes.h:1290
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:198
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:872
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:850
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:842
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
arg_iterator arg_begin()
Definition: Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
size_t arg_size() const
Definition: Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:214
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:623
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:265
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:269
bool hasHiddenVisibility() const
Definition: GlobalValue.h:252
LLVM_ABI StringRef getSection() const
Definition: Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:638
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:132
bool hasComdat() const
Definition: GlobalValue.h:243
Type * getValueType() const
Definition: GlobalValue.h:298
bool hasProtectedVisibility() const
Definition: GlobalValue.h:253
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2230
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:180
bool isUnordered() const
Definition: Instructions.h:253
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
Metadata node.
Definition: Metadata.h:1077
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:410
bool is32BitELFABI() const
Definition: PPCSubtarget.h:224
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:267
MVT getScalarIntVT() const
Definition: PPCSubtarget.h:254
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool useSoftFloat() const
Definition: PPCSubtarget.h:179
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:207
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:261
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:279
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:211
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:285
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:297
bool is64BitELFABI() const
Definition: PPCSubtarget.h:223
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:303
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:157
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:273
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:720
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:500
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:763
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
int getNumOccurrences() const
Definition: CommandLine.h:400
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ TargetConstantPool
Definition: ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ STRICT_FCEIL
Definition: ISDOpcodes.h:454
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1309
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:1018
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:431
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:957
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ TargetExternalSymbol
Definition: ISDOpcodes.h:185
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1157
@ TargetJumpTable
Definition: ISDOpcodes.h:183
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:347
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:180
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:458
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:452
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:453
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ ConstantPool
Definition: ISDOpcodes.h:92
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ STRICT_FROUND
Definition: ISDOpcodes.h:456
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:477
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:455
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1207
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1292
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1204
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ AssertZext
Definition: ISDOpcodes.h:63
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1250
@ STRICT_FRINT
Definition: ISDOpcodes.h:450
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1439
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1315
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1718
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1634
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1724
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:134
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ ADDC
These nodes represent PPC arithmetic operations with carry.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:106
@ XTY_ER
External reference.
Definition: XCOFF.h:242
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
constexpr double e
Definition: MathExtras.h:47
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
unsigned M1(unsigned Val)
Definition: VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:164
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:126
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition: VE.h:376
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:559
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:577
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:280
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:269
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:308
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)